oneocr / _archive /test_ocr.py

OneOCR Dev

OneOCR - reverse engineering complete, ONNX pipeline 53% match rate

ce847d4 about 16 hours ago

9.12 kB

	"""
	Test OneOCR Engine — standalone test bez zależności od struktury src/
	Uruchamia oneocr.dll na obrazie testowym i weryfikuje wynik OCR.
	"""

	from __future__ import annotations

	import ctypes
	import os
	import sys
	from ctypes import (
	POINTER,
	Structure,
	byref,
	c_char_p,
	c_float,
	c_int32,
	c_int64,
	c_ubyte,
	)
	from pathlib import Path
	from contextlib import contextmanager

	from PIL import Image

	# ── Stałe (normalnie z src.config.constants) ──
	OCR_DLL_NAME = "oneocr.dll"
	OCR_MODEL_NAME = "oneocr.onemodel"
	OCR_MODEL_KEY = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4'
	OCR_MAX_LINES = 1000

	# ── Typy C ──
	c_int64_p = POINTER(c_int64)
	c_float_p = POINTER(c_float)
	c_ubyte_p = POINTER(c_ubyte)


	class _ImageStructure(Structure):
	_fields_ = [
	("type", c_int32),
	("width", c_int32),
	("height", c_int32),
	("_reserved", c_int32),
	("step_size", c_int64),
	("data_ptr", c_ubyte_p),
	]


	class _BoundingBox(Structure):
	_fields_ = [
	("x1", c_float), ("y1", c_float),
	("x2", c_float), ("y2", c_float),
	("x3", c_float), ("y3", c_float),
	("x4", c_float), ("y4", c_float),
	]


	_BoundingBox_p = POINTER(_BoundingBox)

	_DLL_FUNCTIONS = [
	("CreateOcrInitOptions", [c_int64_p], c_int64),
	("OcrInitOptionsSetUseModelDelayLoad", [c_int64, ctypes.c_char], c_int64),
	("CreateOcrPipeline", [c_char_p, c_char_p, c_int64, c_int64_p], c_int64),
	("CreateOcrProcessOptions", [c_int64_p], c_int64),
	("OcrProcessOptionsSetMaxRecognitionLineCount", [c_int64, c_int64], c_int64),
	("RunOcrPipeline", [c_int64, POINTER(_ImageStructure), c_int64, c_int64_p], c_int64),
	("GetImageAngle", [c_int64, c_float_p], c_int64),
	("GetOcrLineCount", [c_int64, c_int64_p], c_int64),
	("GetOcrLine", [c_int64, c_int64, c_int64_p], c_int64),
	("GetOcrLineContent", [c_int64, POINTER(c_char_p)], c_int64),
	("GetOcrLineBoundingBox", [c_int64, POINTER(_BoundingBox_p)], c_int64),
	("GetOcrLineWordCount", [c_int64, c_int64_p], c_int64),
	("GetOcrWord", [c_int64, c_int64, c_int64_p], c_int64),
	("GetOcrWordContent", [c_int64, POINTER(c_char_p)], c_int64),
	("GetOcrWordBoundingBox", [c_int64, POINTER(_BoundingBox_p)], c_int64),
	("GetOcrWordConfidence", [c_int64, c_float_p], c_int64),
	("ReleaseOcrResult", [c_int64], None),
	("ReleaseOcrInitOptions", [c_int64], None),
	("ReleaseOcrPipeline", [c_int64], None),
	("ReleaseOcrProcessOptions", [c_int64], None),
	]


	@contextmanager
	def _suppress_output():
	"""Suppress stdout/stderr during DLL initialization."""
	devnull = os.open(os.devnull, os.O_WRONLY)
	original_stdout = os.dup(1)
	original_stderr = os.dup(2)
	os.dup2(devnull, 1)
	os.dup2(devnull, 2)
	try:
	yield
	finally:
	os.dup2(original_stdout, 1)
	os.dup2(original_stderr, 2)
	os.close(original_stdout)
	os.close(original_stderr)
	os.close(devnull)


	def run_ocr_test(image_path: str, ocr_data_dir: str) -> str:
	"""Run OCR on image and return recognized text."""

	# 1. Load DLL
	kernel32 = ctypes.WinDLL("kernel32", use_last_error=True)
	if hasattr(kernel32, "SetDllDirectoryW"):
	kernel32.SetDllDirectoryW(ocr_data_dir)

	dll_path = os.path.join(ocr_data_dir, OCR_DLL_NAME)
	if not os.path.exists(dll_path):
	raise FileNotFoundError(f"DLL not found: {dll_path}")

	dll = ctypes.WinDLL(dll_path)

	for name, argtypes, restype in _DLL_FUNCTIONS:
	func = getattr(dll, name)
	func.argtypes = argtypes
	func.restype = restype

	# 2. Initialize pipeline
	init_options = c_int64()
	pipeline = c_int64()
	process_options = c_int64()

	result = dll.CreateOcrInitOptions(byref(init_options))
	assert result == 0, f"CreateOcrInitOptions failed: {result}"

	result = dll.OcrInitOptionsSetUseModelDelayLoad(init_options, 0)
	assert result == 0, f"OcrInitOptionsSetUseModelDelayLoad failed: {result}"

	model_path = os.path.join(ocr_data_dir, OCR_MODEL_NAME)
	model_buf = ctypes.create_string_buffer(model_path.encode())
	key_buf = ctypes.create_string_buffer(OCR_MODEL_KEY)

	print(f"Loading model from: {model_path}")
	with _suppress_output():
	result = dll.CreateOcrPipeline(model_buf, key_buf, init_options, byref(pipeline))
	assert result == 0, f"CreateOcrPipeline failed: {result} (wrong key or corrupted model?)"
	print("Model loaded successfully!")

	result = dll.CreateOcrProcessOptions(byref(process_options))
	assert result == 0, f"CreateOcrProcessOptions failed: {result}"

	result = dll.OcrProcessOptionsSetMaxRecognitionLineCount(process_options, OCR_MAX_LINES)
	assert result == 0, f"OcrProcessOptionsSetMaxRecognitionLineCount failed: {result}"

	# 3. Load and convert image
	image = Image.open(image_path)
	if image.mode != "RGBA":
	image = image.convert("RGBA")

	# RGB(A) → BGRA
	r, g, b, a = image.split()
	bgra_image = Image.merge("RGBA", (b, g, r, a))

	width = bgra_image.width
	height = bgra_image.height
	step = width * 4
	data = bgra_image.tobytes()

	data_ptr = (c_ubyte * len(data)).from_buffer_copy(data)

	img_struct = _ImageStructure(
	type=3,
	width=width,
	height=height,
	_reserved=0,
	step_size=step,
	data_ptr=data_ptr,
	)

	# 4. Run OCR
	ocr_result = c_int64()
	result = dll.RunOcrPipeline(pipeline, byref(img_struct), process_options, byref(ocr_result))
	assert result == 0, f"RunOcrPipeline failed: {result}"

	# 5. Parse results
	line_count = c_int64()
	result = dll.GetOcrLineCount(ocr_result, byref(line_count))
	assert result == 0, f"GetOcrLineCount failed: {result}"

	lines = []
	for i in range(line_count.value):
	line_handle = c_int64()
	if dll.GetOcrLine(ocr_result, i, byref(line_handle)) != 0:
	continue
	if not line_handle.value:
	continue

	content = c_char_p()
	if dll.GetOcrLineContent(line_handle, byref(content)) == 0 and content.value:
	line_text = content.value.decode("utf-8", errors="ignore")
	lines.append(line_text)

	# Get word details
	word_count = c_int64()
	if dll.GetOcrLineWordCount(line_handle, byref(word_count)) == 0:
	for wi in range(word_count.value):
	word_handle = c_int64()
	if dll.GetOcrWord(line_handle, wi, byref(word_handle)) != 0:
	continue
	word_content = c_char_p()
	if dll.GetOcrWordContent(word_handle, byref(word_content)) == 0 and word_content.value:
	word_text = word_content.value.decode("utf-8", errors="ignore")
	conf_val = c_float()
	confidence = 0.0
	if dll.GetOcrWordConfidence(word_handle, byref(conf_val)) == 0:
	confidence = conf_val.value
	print(f" Word: '{word_text}' (confidence: {confidence:.2f})")

	# 6. Cleanup
	dll.ReleaseOcrResult(ocr_result)
	dll.ReleaseOcrProcessOptions(process_options)
	dll.ReleaseOcrPipeline(pipeline)
	dll.ReleaseOcrInitOptions(init_options)

	full_text = "\n".join(lines)
	return full_text


	def main():
	project_root = Path(__file__).resolve().parent
	ocr_data_dir = str(project_root / "ocr_data")
	image_path = str(project_root / "image.png")

	if not os.path.exists(image_path):
	print(f"ERROR: Image not found: {image_path}")
	print("Creating test image...")
	# Create a simple test image
	img = Image.new("RGB", (600, 150), color="white")
	from PIL import ImageDraw, ImageFont
	draw = ImageDraw.Draw(img)
	try:
	font = ImageFont.truetype("C:/Windows/Fonts/arial.ttf", 48)
	except OSError:
	font = ImageFont.load_default()
	draw.text((30, 40), "ONE OCR DZIALA!", fill="black", font=font)
	img.save(image_path)
	print(f"Created: {image_path}")

	print("=" * 50)
	print(" OneOCR Test — Standalone Engine Test")
	print("=" * 50)
	print(f"OCR Data: {ocr_data_dir}")
	print(f"Image: {image_path}")
	print()

	try:
	text = run_ocr_test(image_path, ocr_data_dir)
	print()
	print("=" * 50)
	print(f" OCR Result: {text}")
	print("=" * 50)

	if "ONE" in text.upper() and "OCR" in text.upper() and "DZIALA" in text.upper():
	print()
	print("✅ ONE OCR DZIAŁA!")
	else:
	print()
	print(f"⚠️ Rozpoznany tekst: '{text}'")
	print(" (może wymagać lepszego obrazu testowego)")

	except Exception as e:
	print(f"❌ ERROR: {e}")
	import traceback
	traceback.print_exc()
	sys.exit(1)


	if __name__ == "__main__":
	main()