"""
PaddleOCR ラッパー

【重要】PaddleOCR 3.x API変更：
- ocr() → predict() に変更
- use_angle_cls は廃止
- use_doc_orientation_classify, use_doc_unwarping 等を使用
"""

from typing import List, Tuple, Optional
from dataclasses import dataclass
import numpy as np


@dataclass
class OCRResult:
    """OCR検出結果"""

    text: str
    confidence: float
    bbox: List[List[int]]  # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]


class OCREngine:
    """
    PaddleOCRラッパークラス
    日本語テキスト抽出に最適化

    PaddleOCR 3.x 対応
    """

    def __init__(self, lang: str = "japan"):
        """
        Args:
            lang: 言語設定 (3.xでは使用されない可能性あり)
        """
        self.lang = lang
        self._ocr = None
        self._initialized = False

    def _init_ocr(self) -> None:
        """OCRエンジンの遅延初期化"""
        if self._initialized:
            return

        try:
            from paddleocr import PaddleOCR

            # PaddleOCR 3.x 用の初期化
            # 高速化のためドキュメント補正系は無効化
            self._ocr = PaddleOCR(
                use_doc_orientation_classify=False,
                use_doc_unwarping=False,
                use_textline_orientation=False,
            )
            self._initialized = True
            print("[OCR] PaddleOCR 3.x initialized successfully")
        except ImportError:
            print("Warning: PaddleOCR not installed. OCR will not work.")
            self._initialized = False
        except Exception as e:
            print(f"Warning: PaddleOCR init error: {e}")
            self._initialized = False

    def detect(self, frame: np.ndarray) -> List[OCRResult]:
        """
        フレームからテキストを検出

        Args:
            frame: 入力画像（BGR形式）

        Returns:
            OCRResult のリスト
        """
        self._init_ocr()

        if self._ocr is None:
            return []

        try:
            # PaddleOCR 3.x: predict() を使用
            result = self._ocr.predict(frame)

            if result is None:
                return []

            ocr_results = []

            # PaddleOCR 3.x の結果形式を解析
            # 結果はジェネレータまたはリストで返される
            for item in result:
                if item is None:
                    continue

                # 結果がdict形式の場合
                if isinstance(item, dict):
                    rec_texts = item.get("rec_texts", [])
                    rec_scores = item.get("rec_scores", [])
                    dt_polys = item.get("dt_polys", [])

                    for i, text in enumerate(rec_texts):
                        if text and len(text.strip()) > 0:
                            confidence = rec_scores[i] if i < len(rec_scores) else 0.0
                            bbox = dt_polys[i] if i < len(dt_polys) else [[0,0],[0,0],[0,0],[0,0]]
                            ocr_results.append(
                                OCRResult(
                                    text=text,
                                    confidence=float(confidence),
                                    bbox=bbox,
                                )
                            )
                # 旧形式のタプル/リストの場合
                elif isinstance(item, (list, tuple)):
                    for line in item:
                        if line is None:
                            continue
                        if isinstance(line, (list, tuple)) and len(line) >= 2:
                            bbox = line[0]
                            text_info = line[1]
                            if text_info and len(text_info) >= 2:
                                text = text_info[0]
                                confidence = float(text_info[1])
                                ocr_results.append(
                                    OCRResult(
                                        text=text,
                                        confidence=confidence,
                                        bbox=bbox,
                                    )
                                )

            return ocr_results

        except Exception as e:
            print(f"OCR error: {e}")
            import traceback
            traceback.print_exc()
            return []

    def detect_text_only(self, frame: np.ndarray) -> List[str]:
        """
        テキストのみを抽出（信頼度でフィルタリング）

        Args:
            frame: 入力画像

        Returns:
            検出されたテキストのリスト
        """
        results = self.detect(frame)
        # 信頼度0.5以上のテキストのみ
        return [r.text for r in results if r.confidence >= 0.5]

    def detect_with_positions(
        self, frame: np.ndarray
    ) -> List[Tuple[str, float, Tuple[int, int]]]:
        """
        テキストと位置情報を抽出

        Returns:
            (テキスト, 信頼度, 中心座標) のリスト
        """
        results = self.detect(frame)
        output = []

        for r in results:
            if r.confidence < 0.5:
                continue
            # バウンディングボックスの中心を計算
            xs = [p[0] for p in r.bbox]
            ys = [p[1] for p in r.bbox]
            center_x = int(sum(xs) / 4)
            center_y = int(sum(ys) / 4)
            output.append((r.text, r.confidence, (center_x, center_y)))

        return output

    @property
    def is_available(self) -> bool:
        """OCRエンジンが利用可能かどうか"""
        self._init_ocr()
        return self._initialized and self._ocr is not None