#!/usr/bin/env python3
"""
Example usage of the Khmer OCR Recognition Model
Demonstrates how to use the model for Khmer text recognition
"""

from paddleocr import PaddleOCR
import cv2
import os
import json

def khmer_ocr_example(image_path, model_dir="."):
    """
    Example function showing how to use the Khmer OCR model
    
    Args:
        image_path (str): Path to the image containing Khmer text
        model_dir (str): Directory containing the model files
    
    Returns:
        list: OCR results with text, confidence, and bounding boxes
    """
    
    print(f"🔍 Processing: {image_path}")
    print("=" * 50)
    
    # Initialize PaddleOCR with custom Khmer model
    try:
        ocr = PaddleOCR(
            use_angle_cls=True,
            lang='ch',  # Use Chinese as base language
            rec_model_dir=model_dir,  # Directory with inference files
            rec_char_dict_path=os.path.join(model_dir, 'khmer_char_dict.txt'),
            show_log=False
        )
        print("✅ Model loaded successfully")
    except Exception as e:
        print(f"❌ Error loading model: {e}")
        return None
    
    # Check if image exists
    if not os.path.exists(image_path):
        print(f"❌ Image file not found: {image_path}")
        return None
    
    # Process the image
    try:
        result = ocr.ocr(image_path, cls=True)
        print("✅ OCR processing completed")
    except Exception as e:
        print(f"❌ Error processing image: {e}")
        return None
    
    # Extract and display results
    if result[0] is None:
        print("⚠️ No text detected in the image.")
        return []
    
    all_results = []
    total_confidence = 0
    
    print(f"\n📝 Detected Text Regions: {len(result[0])}")
    print("-" * 50)
    
    for idx, line in enumerate(result[0]):
        box = line[0]  # Bounding box coordinates [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]
        text = line[1][0]  # Recognized text
        confidence = line[1][1]  # Confidence score
        
        # Store result
        result_item = {
            'region_id': idx + 1,
            'text': text,
            'confidence': confidence,
            'bounding_box': box
        }
        all_results.append(result_item)
        total_confidence += confidence
        
        # Display result
        print(f"Region {idx + 1}:")
        print(f"  📄 Text: {text}")
        print(f"  🎯 Confidence: {confidence:.3f}")
        print(f"  📍 Box: [{box[0][0]:.0f},{box[0][1]:.0f}] → [{box[2][0]:.0f},{box[2][1]:.0f}]")
        print()
    
    # Summary
    avg_confidence = total_confidence / len(result[0]) if result[0] else 0
    print("📊 Summary:")
    print(f"  Total regions: {len(result[0])}")
    print(f"  Average confidence: {avg_confidence:.3f}")
    
    # Combine all text
    full_text = " ".join([item['text'] for item in all_results])
    print(f"  📝 Full text: {full_text}")
    
    return all_results

def batch_process_images(image_dir, model_dir=".", output_file="ocr_results.json"):
    """
    Process multiple images in a directory
    
    Args:
        image_dir (str): Directory containing images
        model_dir (str): Directory containing model files
        output_file (str): Output JSON file for results
    """
    
    print(f"🔄 Batch processing images from: {image_dir}")
    
    # Find image files
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
    image_files = []
    
    if os.path.isdir(image_dir):
        for file in os.listdir(image_dir):
            if any(file.lower().endswith(ext) for ext in image_extensions):
                image_files.append(os.path.join(image_dir, file))
    
    if not image_files:
        print(f"❌ No image files found in {image_dir}")
        return
    
    print(f"📁 Found {len(image_files)} images")
    
    all_results = {}
    
    for image_path in image_files:
        print(f"\n🖼️ Processing: {os.path.basename(image_path)}")
        results = khmer_ocr_example(image_path, model_dir)
        if results:
            all_results[image_path] = results
    
    # Save results to JSON
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(all_results, f, ensure_ascii=False, indent=2)
        print(f"\n💾 Results saved to: {output_file}")
    except Exception as e:
        print(f"❌ Error saving results: {e}")

def main():
    """Main function with example usage"""
    
    print("🇰🇭 Khmer OCR Recognition Model - Example Usage")
    print("=" * 60)
    
    # Example 1: Single image processing
    print("\n📖 Example 1: Single Image Processing")
    print("-" * 40)
    
    # You can replace this with your actual image path
    example_image = "sample_khmer_image.jpg"
    
    if os.path.exists(example_image):
        results = khmer_ocr_example(example_image)
        if results:
            print("✅ Single image processing completed successfully!")
    else:
        print(f"ℹ️ Example image '{example_image}' not found.")
        print("   Please provide your own Khmer text image.")
    
    # Example 2: Batch processing
    print("\n📖 Example 2: Batch Processing")
    print("-" * 40)
    
    sample_dir = "sample_images"
    if os.path.exists(sample_dir):
        batch_process_images(sample_dir)
    else:
        print(f"ℹ️ Sample directory '{sample_dir}' not found.")
        print("   Create a directory with Khmer images to test batch processing.")
    
    # Example 3: Model info
    print("\n📖 Example 3: Model Information")
    print("-" * 40)
    
    model_files = [
        'inference.pdiparams',
        'inference.yml', 
        'inference.json',
        'khmer_char_dict.txt'
    ]
    
    print("📁 Required model files:")
    for file in model_files:
        if os.path.exists(file):
            size = os.path.getsize(file) / (1024*1024)  # MB
            print(f"  ✅ {file} ({size:.1f}MB)")
        else:
            print(f"  ❌ {file} - Missing!")
    
    # Load character dictionary info
    char_dict_path = 'khmer_char_dict.txt'
    if os.path.exists(char_dict_path):
        try:
            with open(char_dict_path, 'r', encoding='utf-8') as f:
                chars = f.read().strip().split('\n')
            print(f"\n📝 Character Dictionary: {len(chars)} characters supported")
            print(f"   Sample characters: {' '.join(chars[:20])}...")
        except Exception as e:
            print(f"❌ Error reading character dictionary: {e}")
    
    print("\n🎯 Usage Tips:")
    print("  • Best for 3-5 word text segments")
    print("  • Use high-contrast, clear images")
    print("  • Combine with text detection for full documents")
    print("  • Model supports 188 Khmer and Latin characters")
    
    print("\n✨ Happy OCR-ing with Khmer text!")

if __name__ == "__main__":
    main()