oneocr / _archive /test_ocr.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""
Test OneOCR Engine — standalone test bez zależności od struktury src/
Uruchamia oneocr.dll na obrazie testowym i weryfikuje wynik OCR.
"""
from __future__ import annotations
import ctypes
import os
import sys
from ctypes import (
POINTER,
Structure,
byref,
c_char_p,
c_float,
c_int32,
c_int64,
c_ubyte,
)
from pathlib import Path
from contextlib import contextmanager
from PIL import Image
# ── Stałe (normalnie z src.config.constants) ──
OCR_DLL_NAME = "oneocr.dll"
OCR_MODEL_NAME = "oneocr.onemodel"
OCR_MODEL_KEY = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4'
OCR_MAX_LINES = 1000
# ── Typy C ──
c_int64_p = POINTER(c_int64)
c_float_p = POINTER(c_float)
c_ubyte_p = POINTER(c_ubyte)
class _ImageStructure(Structure):
_fields_ = [
("type", c_int32),
("width", c_int32),
("height", c_int32),
("_reserved", c_int32),
("step_size", c_int64),
("data_ptr", c_ubyte_p),
]
class _BoundingBox(Structure):
_fields_ = [
("x1", c_float), ("y1", c_float),
("x2", c_float), ("y2", c_float),
("x3", c_float), ("y3", c_float),
("x4", c_float), ("y4", c_float),
]
_BoundingBox_p = POINTER(_BoundingBox)
_DLL_FUNCTIONS = [
("CreateOcrInitOptions", [c_int64_p], c_int64),
("OcrInitOptionsSetUseModelDelayLoad", [c_int64, ctypes.c_char], c_int64),
("CreateOcrPipeline", [c_char_p, c_char_p, c_int64, c_int64_p], c_int64),
("CreateOcrProcessOptions", [c_int64_p], c_int64),
("OcrProcessOptionsSetMaxRecognitionLineCount", [c_int64, c_int64], c_int64),
("RunOcrPipeline", [c_int64, POINTER(_ImageStructure), c_int64, c_int64_p], c_int64),
("GetImageAngle", [c_int64, c_float_p], c_int64),
("GetOcrLineCount", [c_int64, c_int64_p], c_int64),
("GetOcrLine", [c_int64, c_int64, c_int64_p], c_int64),
("GetOcrLineContent", [c_int64, POINTER(c_char_p)], c_int64),
("GetOcrLineBoundingBox", [c_int64, POINTER(_BoundingBox_p)], c_int64),
("GetOcrLineWordCount", [c_int64, c_int64_p], c_int64),
("GetOcrWord", [c_int64, c_int64, c_int64_p], c_int64),
("GetOcrWordContent", [c_int64, POINTER(c_char_p)], c_int64),
("GetOcrWordBoundingBox", [c_int64, POINTER(_BoundingBox_p)], c_int64),
("GetOcrWordConfidence", [c_int64, c_float_p], c_int64),
("ReleaseOcrResult", [c_int64], None),
("ReleaseOcrInitOptions", [c_int64], None),
("ReleaseOcrPipeline", [c_int64], None),
("ReleaseOcrProcessOptions", [c_int64], None),
]
@contextmanager
def _suppress_output():
"""Suppress stdout/stderr during DLL initialization."""
devnull = os.open(os.devnull, os.O_WRONLY)
original_stdout = os.dup(1)
original_stderr = os.dup(2)
os.dup2(devnull, 1)
os.dup2(devnull, 2)
try:
yield
finally:
os.dup2(original_stdout, 1)
os.dup2(original_stderr, 2)
os.close(original_stdout)
os.close(original_stderr)
os.close(devnull)
def run_ocr_test(image_path: str, ocr_data_dir: str) -> str:
"""Run OCR on image and return recognized text."""
# 1. Load DLL
kernel32 = ctypes.WinDLL("kernel32", use_last_error=True)
if hasattr(kernel32, "SetDllDirectoryW"):
kernel32.SetDllDirectoryW(ocr_data_dir)
dll_path = os.path.join(ocr_data_dir, OCR_DLL_NAME)
if not os.path.exists(dll_path):
raise FileNotFoundError(f"DLL not found: {dll_path}")
dll = ctypes.WinDLL(dll_path)
for name, argtypes, restype in _DLL_FUNCTIONS:
func = getattr(dll, name)
func.argtypes = argtypes
func.restype = restype
# 2. Initialize pipeline
init_options = c_int64()
pipeline = c_int64()
process_options = c_int64()
result = dll.CreateOcrInitOptions(byref(init_options))
assert result == 0, f"CreateOcrInitOptions failed: {result}"
result = dll.OcrInitOptionsSetUseModelDelayLoad(init_options, 0)
assert result == 0, f"OcrInitOptionsSetUseModelDelayLoad failed: {result}"
model_path = os.path.join(ocr_data_dir, OCR_MODEL_NAME)
model_buf = ctypes.create_string_buffer(model_path.encode())
key_buf = ctypes.create_string_buffer(OCR_MODEL_KEY)
print(f"Loading model from: {model_path}")
with _suppress_output():
result = dll.CreateOcrPipeline(model_buf, key_buf, init_options, byref(pipeline))
assert result == 0, f"CreateOcrPipeline failed: {result} (wrong key or corrupted model?)"
print("Model loaded successfully!")
result = dll.CreateOcrProcessOptions(byref(process_options))
assert result == 0, f"CreateOcrProcessOptions failed: {result}"
result = dll.OcrProcessOptionsSetMaxRecognitionLineCount(process_options, OCR_MAX_LINES)
assert result == 0, f"OcrProcessOptionsSetMaxRecognitionLineCount failed: {result}"
# 3. Load and convert image
image = Image.open(image_path)
if image.mode != "RGBA":
image = image.convert("RGBA")
# RGB(A) → BGRA
r, g, b, a = image.split()
bgra_image = Image.merge("RGBA", (b, g, r, a))
width = bgra_image.width
height = bgra_image.height
step = width * 4
data = bgra_image.tobytes()
data_ptr = (c_ubyte * len(data)).from_buffer_copy(data)
img_struct = _ImageStructure(
type=3,
width=width,
height=height,
_reserved=0,
step_size=step,
data_ptr=data_ptr,
)
# 4. Run OCR
ocr_result = c_int64()
result = dll.RunOcrPipeline(pipeline, byref(img_struct), process_options, byref(ocr_result))
assert result == 0, f"RunOcrPipeline failed: {result}"
# 5. Parse results
line_count = c_int64()
result = dll.GetOcrLineCount(ocr_result, byref(line_count))
assert result == 0, f"GetOcrLineCount failed: {result}"
lines = []
for i in range(line_count.value):
line_handle = c_int64()
if dll.GetOcrLine(ocr_result, i, byref(line_handle)) != 0:
continue
if not line_handle.value:
continue
content = c_char_p()
if dll.GetOcrLineContent(line_handle, byref(content)) == 0 and content.value:
line_text = content.value.decode("utf-8", errors="ignore")
lines.append(line_text)
# Get word details
word_count = c_int64()
if dll.GetOcrLineWordCount(line_handle, byref(word_count)) == 0:
for wi in range(word_count.value):
word_handle = c_int64()
if dll.GetOcrWord(line_handle, wi, byref(word_handle)) != 0:
continue
word_content = c_char_p()
if dll.GetOcrWordContent(word_handle, byref(word_content)) == 0 and word_content.value:
word_text = word_content.value.decode("utf-8", errors="ignore")
conf_val = c_float()
confidence = 0.0
if dll.GetOcrWordConfidence(word_handle, byref(conf_val)) == 0:
confidence = conf_val.value
print(f" Word: '{word_text}' (confidence: {confidence:.2f})")
# 6. Cleanup
dll.ReleaseOcrResult(ocr_result)
dll.ReleaseOcrProcessOptions(process_options)
dll.ReleaseOcrPipeline(pipeline)
dll.ReleaseOcrInitOptions(init_options)
full_text = "\n".join(lines)
return full_text
def main():
project_root = Path(__file__).resolve().parent
ocr_data_dir = str(project_root / "ocr_data")
image_path = str(project_root / "image.png")
if not os.path.exists(image_path):
print(f"ERROR: Image not found: {image_path}")
print("Creating test image...")
# Create a simple test image
img = Image.new("RGB", (600, 150), color="white")
from PIL import ImageDraw, ImageFont
draw = ImageDraw.Draw(img)
try:
font = ImageFont.truetype("C:/Windows/Fonts/arial.ttf", 48)
except OSError:
font = ImageFont.load_default()
draw.text((30, 40), "ONE OCR DZIALA!", fill="black", font=font)
img.save(image_path)
print(f"Created: {image_path}")
print("=" * 50)
print(" OneOCR Test — Standalone Engine Test")
print("=" * 50)
print(f"OCR Data: {ocr_data_dir}")
print(f"Image: {image_path}")
print()
try:
text = run_ocr_test(image_path, ocr_data_dir)
print()
print("=" * 50)
print(f" OCR Result: {text}")
print("=" * 50)
if "ONE" in text.upper() and "OCR" in text.upper() and "DZIALA" in text.upper():
print()
print("✅ ONE OCR DZIAŁA!")
else:
print()
print(f"⚠️ Rozpoznany tekst: '{text}'")
print(" (może wymagać lepszego obrazu testowego)")
except Exception as e:
print(f"❌ ERROR: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()