Spaces:

fumiyaaa
/

dokoCame

Sleeping

File size: 5,784 Bytes

319f4cb
 
 
 
 
 
 
 
57bc6ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319f4cb
 
57bc6ef
 
1eff75e
57bc6ef
 
319f4cb
57bc6ef
 
 
 
 
 
 
 
 
 
 
 
 
319f4cb
 
57bc6ef
319f4cb
 
 
57bc6ef
 
319f4cb
57bc6ef
 
 
1eff75e
 
 
57bc6ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319f4cb
 
57bc6ef
319f4cb
57bc6ef
 
 
319f4cb
 
 
 
 
57bc6ef
319f4cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57bc6ef
319f4cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57bc6ef
 
 
 
 
319f4cb
 
57bc6ef

"""
PaddleOCR ラッパー

【重要】PaddleOCR 3.x API変更：
- ocr() → predict() に変更
- use_angle_cls は廃止
- use_doc_orientation_classify, use_doc_unwarping 等を使用
"""

from typing import List, Tuple, Optional
from dataclasses import dataclass
import numpy as np


@dataclass
class OCRResult:
    """OCR検出結果"""

    text: str
    confidence: float
    bbox: List[List[int]]  # [[x1,y1], [x2,y2], [x3,y3], [x4,y4]]


class OCREngine:
    """
    PaddleOCRラッパークラス
    日本語テキスト抽出に最適化

    PaddleOCR 3.x 対応
    """

    def __init__(self, lang: str = "japan"):
        """
        Args:
            lang: 言語設定 (3.xでは使用されない可能性あり)
        """
        self.lang = lang
        self._ocr = None
        self._initialized = False

    def _init_ocr(self) -> None:
        """OCRエンジンの遅延初期化"""
        if self._initialized:
            return

        try:
            from paddleocr import PaddleOCR

            # PaddleOCR 3.x 用の初期化
            # 高速化のためドキュメント補正系は無効化
            self._ocr = PaddleOCR(
                use_doc_orientation_classify=False,
                use_doc_unwarping=False,
                use_textline_orientation=False,
            )
            self._initialized = True
            print("[OCR] PaddleOCR 3.x initialized successfully")
        except ImportError:
            print("Warning: PaddleOCR not installed. OCR will not work.")
            self._initialized = False
        except Exception as e:
            print(f"Warning: PaddleOCR init error: {e}")
            self._initialized = False

    def detect(self, frame: np.ndarray) -> List[OCRResult]:
        """
        フレームからテキストを検出

        Args:
            frame: 入力画像（BGR形式）

        Returns:
            OCRResult のリスト
        """
        self._init_ocr()

        if self._ocr is None:
            return []

        try:
            # PaddleOCR 3.x: predict() を使用
            result = self._ocr.predict(frame)

            if result is None:
                return []

            ocr_results = []

            # PaddleOCR 3.x の結果形式を解析
            # 結果はジェネレータまたはリストで返される
            for item in result:
                if item is None:
                    continue

                # 結果がdict形式の場合
                if isinstance(item, dict):
                    rec_texts = item.get("rec_texts", [])
                    rec_scores = item.get("rec_scores", [])
                    dt_polys = item.get("dt_polys", [])

                    for i, text in enumerate(rec_texts):
                        if text and len(text.strip()) > 0:
                            confidence = rec_scores[i] if i < len(rec_scores) else 0.0
                            bbox = dt_polys[i] if i < len(dt_polys) else [[0,0],[0,0],[0,0],[0,0]]
                            ocr_results.append(
                                OCRResult(
                                    text=text,
                                    confidence=float(confidence),
                                    bbox=bbox,
                                )
                            )
                # 旧形式のタプル/リストの場合
                elif isinstance(item, (list, tuple)):
                    for line in item:
                        if line is None:
                            continue
                        if isinstance(line, (list, tuple)) and len(line) >= 2:
                            bbox = line[0]
                            text_info = line[1]
                            if text_info and len(text_info) >= 2:
                                text = text_info[0]
                                confidence = float(text_info[1])
                                ocr_results.append(
                                    OCRResult(
                                        text=text,
                                        confidence=confidence,
                                        bbox=bbox,
                                    )
                                )

            return ocr_results

        except Exception as e:
            print(f"OCR error: {e}")
            import traceback
            traceback.print_exc()
            return []

    def detect_text_only(self, frame: np.ndarray) -> List[str]:
        """
        テキストのみを抽出（信頼度でフィルタリング）

        Args:
            frame: 入力画像

        Returns:
            検出されたテキストのリスト
        """
        results = self.detect(frame)
        # 信頼度0.5以上のテキストのみ
        return [r.text for r in results if r.confidence >= 0.5]

    def detect_with_positions(
        self, frame: np.ndarray
    ) -> List[Tuple[str, float, Tuple[int, int]]]:
        """
        テキストと位置情報を抽出

        Returns:
            (テキスト, 信頼度, 中心座標) のリスト
        """
        results = self.detect(frame)
        output = []

        for r in results:
            if r.confidence < 0.5:
                continue
            # バウンディングボックスの中心を計算
            xs = [p[0] for p in r.bbox]
            ys = [p[1] for p in r.bbox]
            center_x = int(sum(xs) / 4)
            center_y = int(sum(ys) / 4)
            output.append((r.text, r.confidence, (center_x, center_y)))

        return output

    @property
    def is_available(self) -> bool:
        """OCRエンジンが利用可能かどうか"""
        self._init_ocr()
        return self._initialized and self._ocr is not None