""" Test OneOCR Engine — standalone test bez zależności od struktury src/ Uruchamia oneocr.dll na obrazie testowym i weryfikuje wynik OCR. """ from __future__ import annotations import ctypes import os import sys from ctypes import ( POINTER, Structure, byref, c_char_p, c_float, c_int32, c_int64, c_ubyte, ) from pathlib import Path from contextlib import contextmanager from PIL import Image # ── Stałe (normalnie z src.config.constants) ── OCR_DLL_NAME = "oneocr.dll" OCR_MODEL_NAME = "oneocr.onemodel" OCR_MODEL_KEY = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4' OCR_MAX_LINES = 1000 # ── Typy C ── c_int64_p = POINTER(c_int64) c_float_p = POINTER(c_float) c_ubyte_p = POINTER(c_ubyte) class _ImageStructure(Structure): _fields_ = [ ("type", c_int32), ("width", c_int32), ("height", c_int32), ("_reserved", c_int32), ("step_size", c_int64), ("data_ptr", c_ubyte_p), ] class _BoundingBox(Structure): _fields_ = [ ("x1", c_float), ("y1", c_float), ("x2", c_float), ("y2", c_float), ("x3", c_float), ("y3", c_float), ("x4", c_float), ("y4", c_float), ] _BoundingBox_p = POINTER(_BoundingBox) _DLL_FUNCTIONS = [ ("CreateOcrInitOptions", [c_int64_p], c_int64), ("OcrInitOptionsSetUseModelDelayLoad", [c_int64, ctypes.c_char], c_int64), ("CreateOcrPipeline", [c_char_p, c_char_p, c_int64, c_int64_p], c_int64), ("CreateOcrProcessOptions", [c_int64_p], c_int64), ("OcrProcessOptionsSetMaxRecognitionLineCount", [c_int64, c_int64], c_int64), ("RunOcrPipeline", [c_int64, POINTER(_ImageStructure), c_int64, c_int64_p], c_int64), ("GetImageAngle", [c_int64, c_float_p], c_int64), ("GetOcrLineCount", [c_int64, c_int64_p], c_int64), ("GetOcrLine", [c_int64, c_int64, c_int64_p], c_int64), ("GetOcrLineContent", [c_int64, POINTER(c_char_p)], c_int64), ("GetOcrLineBoundingBox", [c_int64, POINTER(_BoundingBox_p)], c_int64), ("GetOcrLineWordCount", [c_int64, c_int64_p], c_int64), ("GetOcrWord", [c_int64, c_int64, c_int64_p], c_int64), ("GetOcrWordContent", [c_int64, POINTER(c_char_p)], c_int64), ("GetOcrWordBoundingBox", [c_int64, POINTER(_BoundingBox_p)], c_int64), ("GetOcrWordConfidence", [c_int64, c_float_p], c_int64), ("ReleaseOcrResult", [c_int64], None), ("ReleaseOcrInitOptions", [c_int64], None), ("ReleaseOcrPipeline", [c_int64], None), ("ReleaseOcrProcessOptions", [c_int64], None), ] @contextmanager def _suppress_output(): """Suppress stdout/stderr during DLL initialization.""" devnull = os.open(os.devnull, os.O_WRONLY) original_stdout = os.dup(1) original_stderr = os.dup(2) os.dup2(devnull, 1) os.dup2(devnull, 2) try: yield finally: os.dup2(original_stdout, 1) os.dup2(original_stderr, 2) os.close(original_stdout) os.close(original_stderr) os.close(devnull) def run_ocr_test(image_path: str, ocr_data_dir: str) -> str: """Run OCR on image and return recognized text.""" # 1. Load DLL kernel32 = ctypes.WinDLL("kernel32", use_last_error=True) if hasattr(kernel32, "SetDllDirectoryW"): kernel32.SetDllDirectoryW(ocr_data_dir) dll_path = os.path.join(ocr_data_dir, OCR_DLL_NAME) if not os.path.exists(dll_path): raise FileNotFoundError(f"DLL not found: {dll_path}") dll = ctypes.WinDLL(dll_path) for name, argtypes, restype in _DLL_FUNCTIONS: func = getattr(dll, name) func.argtypes = argtypes func.restype = restype # 2. Initialize pipeline init_options = c_int64() pipeline = c_int64() process_options = c_int64() result = dll.CreateOcrInitOptions(byref(init_options)) assert result == 0, f"CreateOcrInitOptions failed: {result}" result = dll.OcrInitOptionsSetUseModelDelayLoad(init_options, 0) assert result == 0, f"OcrInitOptionsSetUseModelDelayLoad failed: {result}" model_path = os.path.join(ocr_data_dir, OCR_MODEL_NAME) model_buf = ctypes.create_string_buffer(model_path.encode()) key_buf = ctypes.create_string_buffer(OCR_MODEL_KEY) print(f"Loading model from: {model_path}") with _suppress_output(): result = dll.CreateOcrPipeline(model_buf, key_buf, init_options, byref(pipeline)) assert result == 0, f"CreateOcrPipeline failed: {result} (wrong key or corrupted model?)" print("Model loaded successfully!") result = dll.CreateOcrProcessOptions(byref(process_options)) assert result == 0, f"CreateOcrProcessOptions failed: {result}" result = dll.OcrProcessOptionsSetMaxRecognitionLineCount(process_options, OCR_MAX_LINES) assert result == 0, f"OcrProcessOptionsSetMaxRecognitionLineCount failed: {result}" # 3. Load and convert image image = Image.open(image_path) if image.mode != "RGBA": image = image.convert("RGBA") # RGB(A) → BGRA r, g, b, a = image.split() bgra_image = Image.merge("RGBA", (b, g, r, a)) width = bgra_image.width height = bgra_image.height step = width * 4 data = bgra_image.tobytes() data_ptr = (c_ubyte * len(data)).from_buffer_copy(data) img_struct = _ImageStructure( type=3, width=width, height=height, _reserved=0, step_size=step, data_ptr=data_ptr, ) # 4. Run OCR ocr_result = c_int64() result = dll.RunOcrPipeline(pipeline, byref(img_struct), process_options, byref(ocr_result)) assert result == 0, f"RunOcrPipeline failed: {result}" # 5. Parse results line_count = c_int64() result = dll.GetOcrLineCount(ocr_result, byref(line_count)) assert result == 0, f"GetOcrLineCount failed: {result}" lines = [] for i in range(line_count.value): line_handle = c_int64() if dll.GetOcrLine(ocr_result, i, byref(line_handle)) != 0: continue if not line_handle.value: continue content = c_char_p() if dll.GetOcrLineContent(line_handle, byref(content)) == 0 and content.value: line_text = content.value.decode("utf-8", errors="ignore") lines.append(line_text) # Get word details word_count = c_int64() if dll.GetOcrLineWordCount(line_handle, byref(word_count)) == 0: for wi in range(word_count.value): word_handle = c_int64() if dll.GetOcrWord(line_handle, wi, byref(word_handle)) != 0: continue word_content = c_char_p() if dll.GetOcrWordContent(word_handle, byref(word_content)) == 0 and word_content.value: word_text = word_content.value.decode("utf-8", errors="ignore") conf_val = c_float() confidence = 0.0 if dll.GetOcrWordConfidence(word_handle, byref(conf_val)) == 0: confidence = conf_val.value print(f" Word: '{word_text}' (confidence: {confidence:.2f})") # 6. Cleanup dll.ReleaseOcrResult(ocr_result) dll.ReleaseOcrProcessOptions(process_options) dll.ReleaseOcrPipeline(pipeline) dll.ReleaseOcrInitOptions(init_options) full_text = "\n".join(lines) return full_text def main(): project_root = Path(__file__).resolve().parent ocr_data_dir = str(project_root / "ocr_data") image_path = str(project_root / "image.png") if not os.path.exists(image_path): print(f"ERROR: Image not found: {image_path}") print("Creating test image...") # Create a simple test image img = Image.new("RGB", (600, 150), color="white") from PIL import ImageDraw, ImageFont draw = ImageDraw.Draw(img) try: font = ImageFont.truetype("C:/Windows/Fonts/arial.ttf", 48) except OSError: font = ImageFont.load_default() draw.text((30, 40), "ONE OCR DZIALA!", fill="black", font=font) img.save(image_path) print(f"Created: {image_path}") print("=" * 50) print(" OneOCR Test — Standalone Engine Test") print("=" * 50) print(f"OCR Data: {ocr_data_dir}") print(f"Image: {image_path}") print() try: text = run_ocr_test(image_path, ocr_data_dir) print() print("=" * 50) print(f" OCR Result: {text}") print("=" * 50) if "ONE" in text.upper() and "OCR" in text.upper() and "DZIALA" in text.upper(): print() print("✅ ONE OCR DZIAŁA!") else: print() print(f"⚠️ Rozpoznany tekst: '{text}'") print(" (może wymagać lepszego obrazu testowego)") except Exception as e: print(f"❌ ERROR: {e}") import traceback traceback.print_exc() sys.exit(1) if __name__ == "__main__": main()