oneocr

File size: 9,120 Bytes

ce847d4

"""
Test OneOCR Engine — standalone test bez zależności od struktury src/
Uruchamia oneocr.dll na obrazie testowym i weryfikuje wynik OCR.
"""

from __future__ import annotations

import ctypes
import os
import sys
from ctypes import (
    POINTER,
    Structure,
    byref,
    c_char_p,
    c_float,
    c_int32,
    c_int64,
    c_ubyte,
)
from pathlib import Path
from contextlib import contextmanager

from PIL import Image

# ── Stałe (normalnie z src.config.constants) ──
OCR_DLL_NAME = "oneocr.dll"
OCR_MODEL_NAME = "oneocr.onemodel"
OCR_MODEL_KEY = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4'
OCR_MAX_LINES = 1000

# ── Typy C ──
c_int64_p = POINTER(c_int64)
c_float_p = POINTER(c_float)
c_ubyte_p = POINTER(c_ubyte)


class _ImageStructure(Structure):
    _fields_ = [
        ("type", c_int32),
        ("width", c_int32),
        ("height", c_int32),
        ("_reserved", c_int32),
        ("step_size", c_int64),
        ("data_ptr", c_ubyte_p),
    ]


class _BoundingBox(Structure):
    _fields_ = [
        ("x1", c_float), ("y1", c_float),
        ("x2", c_float), ("y2", c_float),
        ("x3", c_float), ("y3", c_float),
        ("x4", c_float), ("y4", c_float),
    ]


_BoundingBox_p = POINTER(_BoundingBox)

_DLL_FUNCTIONS = [
    ("CreateOcrInitOptions", [c_int64_p], c_int64),
    ("OcrInitOptionsSetUseModelDelayLoad", [c_int64, ctypes.c_char], c_int64),
    ("CreateOcrPipeline", [c_char_p, c_char_p, c_int64, c_int64_p], c_int64),
    ("CreateOcrProcessOptions", [c_int64_p], c_int64),
    ("OcrProcessOptionsSetMaxRecognitionLineCount", [c_int64, c_int64], c_int64),
    ("RunOcrPipeline", [c_int64, POINTER(_ImageStructure), c_int64, c_int64_p], c_int64),
    ("GetImageAngle", [c_int64, c_float_p], c_int64),
    ("GetOcrLineCount", [c_int64, c_int64_p], c_int64),
    ("GetOcrLine", [c_int64, c_int64, c_int64_p], c_int64),
    ("GetOcrLineContent", [c_int64, POINTER(c_char_p)], c_int64),
    ("GetOcrLineBoundingBox", [c_int64, POINTER(_BoundingBox_p)], c_int64),
    ("GetOcrLineWordCount", [c_int64, c_int64_p], c_int64),
    ("GetOcrWord", [c_int64, c_int64, c_int64_p], c_int64),
    ("GetOcrWordContent", [c_int64, POINTER(c_char_p)], c_int64),
    ("GetOcrWordBoundingBox", [c_int64, POINTER(_BoundingBox_p)], c_int64),
    ("GetOcrWordConfidence", [c_int64, c_float_p], c_int64),
    ("ReleaseOcrResult", [c_int64], None),
    ("ReleaseOcrInitOptions", [c_int64], None),
    ("ReleaseOcrPipeline", [c_int64], None),
    ("ReleaseOcrProcessOptions", [c_int64], None),
]


@contextmanager
def _suppress_output():
    """Suppress stdout/stderr during DLL initialization."""
    devnull = os.open(os.devnull, os.O_WRONLY)
    original_stdout = os.dup(1)
    original_stderr = os.dup(2)
    os.dup2(devnull, 1)
    os.dup2(devnull, 2)
    try:
        yield
    finally:
        os.dup2(original_stdout, 1)
        os.dup2(original_stderr, 2)
        os.close(original_stdout)
        os.close(original_stderr)
        os.close(devnull)


def run_ocr_test(image_path: str, ocr_data_dir: str) -> str:
    """Run OCR on image and return recognized text."""
    
    # 1. Load DLL
    kernel32 = ctypes.WinDLL("kernel32", use_last_error=True)
    if hasattr(kernel32, "SetDllDirectoryW"):
        kernel32.SetDllDirectoryW(ocr_data_dir)
    
    dll_path = os.path.join(ocr_data_dir, OCR_DLL_NAME)
    if not os.path.exists(dll_path):
        raise FileNotFoundError(f"DLL not found: {dll_path}")
    
    dll = ctypes.WinDLL(dll_path)
    
    for name, argtypes, restype in _DLL_FUNCTIONS:
        func = getattr(dll, name)
        func.argtypes = argtypes
        func.restype = restype
    
    # 2. Initialize pipeline
    init_options = c_int64()
    pipeline = c_int64()
    process_options = c_int64()
    
    result = dll.CreateOcrInitOptions(byref(init_options))
    assert result == 0, f"CreateOcrInitOptions failed: {result}"
    
    result = dll.OcrInitOptionsSetUseModelDelayLoad(init_options, 0)
    assert result == 0, f"OcrInitOptionsSetUseModelDelayLoad failed: {result}"
    
    model_path = os.path.join(ocr_data_dir, OCR_MODEL_NAME)
    model_buf = ctypes.create_string_buffer(model_path.encode())
    key_buf = ctypes.create_string_buffer(OCR_MODEL_KEY)
    
    print(f"Loading model from: {model_path}")
    with _suppress_output():
        result = dll.CreateOcrPipeline(model_buf, key_buf, init_options, byref(pipeline))
    assert result == 0, f"CreateOcrPipeline failed: {result} (wrong key or corrupted model?)"
    print("Model loaded successfully!")
    
    result = dll.CreateOcrProcessOptions(byref(process_options))
    assert result == 0, f"CreateOcrProcessOptions failed: {result}"
    
    result = dll.OcrProcessOptionsSetMaxRecognitionLineCount(process_options, OCR_MAX_LINES)
    assert result == 0, f"OcrProcessOptionsSetMaxRecognitionLineCount failed: {result}"
    
    # 3. Load and convert image
    image = Image.open(image_path)
    if image.mode != "RGBA":
        image = image.convert("RGBA")
    
    # RGB(A) → BGRA
    r, g, b, a = image.split()
    bgra_image = Image.merge("RGBA", (b, g, r, a))
    
    width = bgra_image.width
    height = bgra_image.height
    step = width * 4
    data = bgra_image.tobytes()
    
    data_ptr = (c_ubyte * len(data)).from_buffer_copy(data)
    
    img_struct = _ImageStructure(
        type=3,
        width=width,
        height=height,
        _reserved=0,
        step_size=step,
        data_ptr=data_ptr,
    )
    
    # 4. Run OCR
    ocr_result = c_int64()
    result = dll.RunOcrPipeline(pipeline, byref(img_struct), process_options, byref(ocr_result))
    assert result == 0, f"RunOcrPipeline failed: {result}"
    
    # 5. Parse results
    line_count = c_int64()
    result = dll.GetOcrLineCount(ocr_result, byref(line_count))
    assert result == 0, f"GetOcrLineCount failed: {result}"
    
    lines = []
    for i in range(line_count.value):
        line_handle = c_int64()
        if dll.GetOcrLine(ocr_result, i, byref(line_handle)) != 0:
            continue
        if not line_handle.value:
            continue
        
        content = c_char_p()
        if dll.GetOcrLineContent(line_handle, byref(content)) == 0 and content.value:
            line_text = content.value.decode("utf-8", errors="ignore")
            lines.append(line_text)
            
            # Get word details
            word_count = c_int64()
            if dll.GetOcrLineWordCount(line_handle, byref(word_count)) == 0:
                for wi in range(word_count.value):
                    word_handle = c_int64()
                    if dll.GetOcrWord(line_handle, wi, byref(word_handle)) != 0:
                        continue
                    word_content = c_char_p()
                    if dll.GetOcrWordContent(word_handle, byref(word_content)) == 0 and word_content.value:
                        word_text = word_content.value.decode("utf-8", errors="ignore")
                        conf_val = c_float()
                        confidence = 0.0
                        if dll.GetOcrWordConfidence(word_handle, byref(conf_val)) == 0:
                            confidence = conf_val.value
                        print(f"  Word: '{word_text}' (confidence: {confidence:.2f})")
    
    # 6. Cleanup
    dll.ReleaseOcrResult(ocr_result)
    dll.ReleaseOcrProcessOptions(process_options)
    dll.ReleaseOcrPipeline(pipeline)
    dll.ReleaseOcrInitOptions(init_options)
    
    full_text = "\n".join(lines)
    return full_text


def main():
    project_root = Path(__file__).resolve().parent
    ocr_data_dir = str(project_root / "ocr_data")
    image_path = str(project_root / "image.png")
    
    if not os.path.exists(image_path):
        print(f"ERROR: Image not found: {image_path}")
        print("Creating test image...")
        # Create a simple test image
        img = Image.new("RGB", (600, 150), color="white")
        from PIL import ImageDraw, ImageFont
        draw = ImageDraw.Draw(img)
        try:
            font = ImageFont.truetype("C:/Windows/Fonts/arial.ttf", 48)
        except OSError:
            font = ImageFont.load_default()
        draw.text((30, 40), "ONE OCR DZIALA!", fill="black", font=font)
        img.save(image_path)
        print(f"Created: {image_path}")
    
    print("=" * 50)
    print("  OneOCR Test — Standalone Engine Test")
    print("=" * 50)
    print(f"OCR Data: {ocr_data_dir}")
    print(f"Image:    {image_path}")
    print()
    
    try:
        text = run_ocr_test(image_path, ocr_data_dir)
        print()
        print("=" * 50)
        print(f"  OCR Result: {text}")
        print("=" * 50)
        
        if "ONE" in text.upper() and "OCR" in text.upper() and "DZIALA" in text.upper():
            print()
            print("✅ ONE OCR DZIAŁA!")
        else:
            print()
            print(f"⚠️  Rozpoznany tekst: '{text}'")
            print("   (może wymagać lepszego obrazu testowego)")
            
    except Exception as e:
        print(f"❌ ERROR: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)


if __name__ == "__main__":
    main()