#!/usr/bin/env python3 """ Wine Bridge — Run OneOCR DLL on Linux via Wine subprocess. Strategy: Use Wine to run a tiny Windows Python script that loads the DLL, processes an image, and returns JSON results via stdout. This avoids ctypes-over-Wine complexity by using Wine's own Python/executable. Architecture: Linux Python ──► subprocess (wine) ──► Windows DLL loader ──► JSON stdout Requirements on Linux: - wine (>= 8.0, 64-bit prefix) - Python for Windows installed in Wine prefix (or standalone exe) Alternative: Compile a minimal C loader → .exe, ship it, run via Wine. """ from __future__ import annotations import json import os import platform import shutil import struct import subprocess import sys import tempfile from pathlib import Path from typing import TYPE_CHECKING if TYPE_CHECKING: from PIL import Image # ─── Wine DLL Loader (C code) ───────────────────────────────────────────── # This is a self-contained C program that loads oneocr.dll and runs OCR. # It is compiled once on the target system using x86_64-w64-mingw32-gcc # (MinGW cross-compiler available on every Linux distro). WINE_LOADER_C = r""" /* oneocr_loader.c -- Minimal OneOCR DLL loader for Wine * Compile: x86_64-w64-mingw32-gcc -O2 -o oneocr_loader.exe oneocr_loader.c * Usage: wine oneocr_loader.exe * Output: JSON to stdout */ #include #include #include #include /* DLL function types */ typedef long long (*fn_CreateOcrInitOptions)(long long*); typedef long long (*fn_OcrInitOptionsSetUseModelDelayLoad)(long long, char); typedef long long (*fn_CreateOcrPipeline)(const char*, const char*, long long, long long*); typedef long long (*fn_CreateOcrProcessOptions)(long long*); typedef long long (*fn_OcrProcessOptionsSetMaxRecognitionLineCount)(long long, long long); typedef long long (*fn_RunOcrPipeline)(long long, void*, long long, long long*); typedef long long (*fn_GetImageAngle)(long long, float*); typedef long long (*fn_GetOcrLineCount)(long long, long long*); typedef long long (*fn_GetOcrLine)(long long, long long, long long*); typedef long long (*fn_GetOcrLineContent)(long long, const char**); typedef long long (*fn_GetOcrLineBoundingBox)(long long, void**); typedef long long (*fn_GetOcrLineWordCount)(long long, long long*); typedef long long (*fn_GetOcrWord)(long long, long long, long long*); typedef long long (*fn_GetOcrWordContent)(long long, const char**); typedef long long (*fn_GetOcrWordBoundingBox)(long long, void**); typedef long long (*fn_GetOcrWordConfidence)(long long, float*); typedef void (*fn_ReleaseOcrResult)(long long); typedef void (*fn_ReleaseOcrInitOptions)(long long); typedef void (*fn_ReleaseOcrPipeline)(long long); typedef void (*fn_ReleaseOcrProcessOptions)(long long); #pragma pack(push, 1) typedef struct { int type; /* 3 = BGRA 4-channel (matches engine.py) */ int width; int height; int reserved; long long step; unsigned char *data; } ImageStruct; typedef struct { float x1, y1, x2, y2, x3, y3, x4, y4; } BBox; #pragma pack(pop) /* Simple BMP loader (32-bit BGRA) */ static unsigned char* load_bmp_bgra(const char* path, int* w, int* h) { FILE* f = fopen(path, "rb"); if (!f) return NULL; unsigned char header[54]; fread(header, 1, 54, f); *w = *(int*)(header + 18); *h = *(int*)(header + 22); int bpp = *(short*)(header + 28); int offset = *(int*)(header + 10); int abs_h = *h < 0 ? -*h : *h; fseek(f, offset, SEEK_SET); /* Allocate BGRA buffer */ unsigned char* bgra = (unsigned char*)malloc((*w) * abs_h * 4); if (bpp == 24) { int row_size = ((*w * 3 + 3) & ~3); unsigned char* row = (unsigned char*)malloc(row_size); for (int y = 0; y < abs_h; y++) { int dest_y = (*h > 0) ? (abs_h - 1 - y) : y; fread(row, 1, row_size, f); for (int x = 0; x < *w; x++) { bgra[(dest_y * *w + x) * 4 + 0] = row[x * 3 + 0]; /* B */ bgra[(dest_y * *w + x) * 4 + 1] = row[x * 3 + 1]; /* G */ bgra[(dest_y * *w + x) * 4 + 2] = row[x * 3 + 2]; /* R */ bgra[(dest_y * *w + x) * 4 + 3] = 255; /* A */ } } free(row); } else if (bpp == 32) { for (int y = 0; y < abs_h; y++) { int dest_y = (*h > 0) ? (abs_h - 1 - y) : y; fread(bgra + dest_y * *w * 4, 1, *w * 4, f); } } *h = abs_h; fclose(f); return bgra; } /* Escape JSON string */ static void json_escape(const char* s, char* out, int max) { int j = 0; out[j++] = '"'; for (int i = 0; s[i] && j < max - 3; i++) { if (s[i] == '"') { out[j++] = '\\'; out[j++] = '"'; } else if (s[i] == '\\') { out[j++] = '\\'; out[j++] = '\\'; } else if (s[i] == '\n') { out[j++] = '\\'; out[j++] = 'n'; } else if (s[i] == '\r') { out[j++] = '\\'; out[j++] = 'r'; } else if (s[i] == '\t') { out[j++] = '\\'; out[j++] = 't'; } else out[j++] = s[i]; } out[j++] = '"'; out[j] = 0; } int main(int argc, char** argv) { if (argc < 4) { fprintf(stderr, "Usage: %s \n", argv[0]); return 1; } const char* dll_dir = argv[1]; const char* img_path = argv[2]; const char* key_hex = argv[3]; /* Set DLL search path */ SetDllDirectoryA(dll_dir); char old_path[32768]; GetEnvironmentVariableA("PATH", old_path, sizeof(old_path)); char new_path[32768]; snprintf(new_path, sizeof(new_path), "%s;%s", dll_dir, old_path); SetEnvironmentVariableA("PATH", new_path); /* Load DLL */ char dll_path[MAX_PATH]; snprintf(dll_path, sizeof(dll_path), "%s\\oneocr.dll", dll_dir); HMODULE hmod = LoadLibraryA(dll_path); if (!hmod) { fprintf(stderr, "{\"error\": \"LoadLibrary failed: %lu\"}\n", GetLastError()); return 1; } /* Get function pointers */ #define GETFN(name) fn_##name p##name = (fn_##name)GetProcAddress(hmod, #name); \ if (!p##name) { fprintf(stderr, "{\"error\": \"GetProcAddress(%s) failed\"}\n", #name); return 1; } GETFN(CreateOcrInitOptions) GETFN(OcrInitOptionsSetUseModelDelayLoad) GETFN(CreateOcrPipeline) GETFN(CreateOcrProcessOptions) GETFN(OcrProcessOptionsSetMaxRecognitionLineCount) GETFN(RunOcrPipeline) GETFN(GetImageAngle) GETFN(GetOcrLineCount) GETFN(GetOcrLine) GETFN(GetOcrLineContent) GETFN(GetOcrLineBoundingBox) GETFN(GetOcrLineWordCount) GETFN(GetOcrWord) GETFN(GetOcrWordContent) GETFN(GetOcrWordBoundingBox) GETFN(GetOcrWordConfidence) GETFN(ReleaseOcrResult) GETFN(ReleaseOcrInitOptions) GETFN(ReleaseOcrPipeline) GETFN(ReleaseOcrProcessOptions) /* Model path and key */ char model_path[MAX_PATH]; snprintf(model_path, sizeof(model_path), "%s\\oneocr.onemodel", dll_dir); /* Decode hex key */ int key_len = strlen(key_hex) / 2; char key[64]; for (int i = 0; i < key_len && i < 63; i++) { sscanf(key_hex + i*2, "%2hhx", &key[i]); } key[key_len] = 0; /* Initialize pipeline */ long long init_opts = 0; pCreateOcrInitOptions(&init_opts); long long pipeline = 0; long long res = pCreateOcrPipeline(model_path, key, init_opts, &pipeline); if (res != 0) { fprintf(stderr, "{\"error\": \"CreateOcrPipeline failed: %lld\"}\n", res); return 1; } long long proc_opts = 0; pCreateOcrProcessOptions(&proc_opts); pOcrProcessOptionsSetMaxRecognitionLineCount(proc_opts, 200); /* Load image */ int w = 0, h = 0; unsigned char* data = load_bmp_bgra(img_path, &w, &h); if (!data) { fprintf(stderr, "{\"error\": \"Failed to load image\"}\n"); return 1; } ImageStruct img = {3, w, h, 0, (long long)(w * 4), data}; /* Run OCR */ long long result = 0; res = pRunOcrPipeline(pipeline, &img, proc_opts, &result); if (res != 0) { fprintf(stderr, "{\"error\": \"RunOcrPipeline failed: %lld\"}\n", res); return 1; } /* Extract results */ float angle = 0; pGetImageAngle(result, &angle); long long line_count = 0; pGetOcrLineCount(result, &line_count); /* Output JSON */ char buf[65536]; int pos = 0; pos += snprintf(buf + pos, sizeof(buf) - pos, "{\"text_angle\": %.4f, \"lines\": [", angle); for (long long i = 0; i < line_count; i++) { long long line = 0; pGetOcrLine(result, i, &line); const char* line_text = NULL; pGetOcrLineContent(line, &line_text); BBox* line_bbox = NULL; pGetOcrLineBoundingBox(line, (void**)&line_bbox); long long word_count = 0; pGetOcrLineWordCount(line, &word_count); if (i > 0) pos += snprintf(buf + pos, sizeof(buf) - pos, ", "); char esc_line[4096]; json_escape(line_text ? line_text : "", esc_line, sizeof(esc_line)); pos += snprintf(buf + pos, sizeof(buf) - pos, "{\"text\": %s, \"bbox\": [%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f], \"words\": [", esc_line, line_bbox ? line_bbox->x1 : 0, line_bbox ? line_bbox->y1 : 0, line_bbox ? line_bbox->x2 : 0, line_bbox ? line_bbox->y2 : 0, line_bbox ? line_bbox->x3 : 0, line_bbox ? line_bbox->y3 : 0, line_bbox ? line_bbox->x4 : 0, line_bbox ? line_bbox->y4 : 0); for (long long j = 0; j < word_count; j++) { long long word = 0; pGetOcrWord(line, j, &word); const char* word_text = NULL; pGetOcrWordContent(word, &word_text); BBox* word_bbox = NULL; pGetOcrWordBoundingBox(word, (void**)&word_bbox); float word_conf = 0; pGetOcrWordConfidence(word, &word_conf); if (j > 0) pos += snprintf(buf + pos, sizeof(buf) - pos, ", "); char esc_word[2048]; json_escape(word_text ? word_text : "", esc_word, sizeof(esc_word)); pos += snprintf(buf + pos, sizeof(buf) - pos, "{\"text\": %s, \"confidence\": %.4f, \"bbox\": [%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f]}", esc_word, word_conf, word_bbox ? word_bbox->x1 : 0, word_bbox ? word_bbox->y1 : 0, word_bbox ? word_bbox->x2 : 0, word_bbox ? word_bbox->y2 : 0, word_bbox ? word_bbox->x3 : 0, word_bbox ? word_bbox->y3 : 0, word_bbox ? word_bbox->x4 : 0, word_bbox ? word_bbox->y4 : 0); } pos += snprintf(buf + pos, sizeof(buf) - pos, "]}"); } pos += snprintf(buf + pos, sizeof(buf) - pos, "]}"); /* Write JSON to stdout */ printf("%s\n", buf); fflush(stdout); /* Cleanup */ pReleaseOcrResult(result); free(data); pReleaseOcrProcessOptions(proc_opts); pReleaseOcrPipeline(pipeline); pReleaseOcrInitOptions(init_opts); FreeLibrary(hmod); return 0; } """ # ─── Python Bridge ───────────────────────────────────────────────────────── class WineBridge: """Bridge to run OneOCR DLL on Linux via Wine. Strategy: 1. Cross-compile a minimal C loader (.exe) using MinGW 2. Run it via `wine64 oneocr_loader.exe ` 3. Parse JSON output One-time setup on Linux: sudo apt install wine64 mingw-w64 # Debian/Ubuntu sudo dnf install wine mingw64-gcc # Fedora sudo pacman -S wine mingw-w64-gcc # Arch """ def __init__(self, ocr_data_dir: str | Path | None = None): self._base = Path(__file__).resolve().parent.parent self._ocr_data = Path(ocr_data_dir) if ocr_data_dir else self._base / "ocr_data" self._loader_exe = self._base / "tools" / "oneocr_loader.exe" self._loader_c = self._base / "tools" / "oneocr_loader.c" self._model_key = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4' # Detect Wine self._wine = self._find_wine() self._mingw = self._find_mingw() @staticmethod def _find_wine() -> str | None: """Find Wine executable.""" for name in ("wine64", "wine"): path = shutil.which(name) if path: return path return None @staticmethod def _find_mingw() -> str | None: """Find MinGW cross-compiler.""" for name in ("x86_64-w64-mingw32-gcc", "x86_64-w64-mingw32-gcc-posix"): path = shutil.which(name) if path: return path return None def check_requirements(self) -> dict[str, bool | str]: """Check if all requirements are met.""" checks = { "platform": platform.system(), "wine_found": self._wine is not None, "wine_path": self._wine or "not found", "mingw_found": self._mingw is not None, "mingw_path": self._mingw or "not found", "dll_exists": (self._ocr_data / "oneocr.dll").exists(), "model_exists": (self._ocr_data / "oneocr.onemodel").exists(), "onnxruntime_exists": (self._ocr_data / "onnxruntime.dll").exists(), "loader_compiled": self._loader_exe.exists(), } checks["ready"] = all([ checks["wine_found"], checks["dll_exists"], checks["model_exists"], checks["onnxruntime_exists"], checks["loader_compiled"] or checks["mingw_found"], ]) return checks def compile_loader(self) -> bool: """Cross-compile the C loader using MinGW.""" if not self._mingw: raise RuntimeError( "MinGW cross-compiler not found. Install it:\n" " Ubuntu/Debian: sudo apt install mingw-w64\n" " Fedora: sudo dnf install mingw64-gcc\n" " Arch: sudo pacman -S mingw-w64-gcc" ) # Write C source self._loader_c.write_text(WINE_LOADER_C, encoding="utf-8") # Compile result = subprocess.run( [self._mingw, "-O2", "-o", str(self._loader_exe), str(self._loader_c)], capture_output=True, text=True, timeout=30, ) if result.returncode != 0: raise RuntimeError(f"Compilation failed:\n{result.stderr}") return self._loader_exe.exists() def recognize_file(self, image_path: str | Path) -> dict: """Run OCR on an image file. Args: image_path: Path to image (PNG, JPEG, BMP). Returns: Dict with 'text_angle', 'lines' (each with 'text', 'bbox', 'words'). """ image_path = Path(image_path) if not self._loader_exe.exists(): self.compile_loader() # Convert image to BMP for the C loader bmp_path = self._to_bmp(image_path) try: # Convert paths to Windows format for Wine dll_dir = self._to_wine_path(self._ocr_data) bmp_wine = self._to_wine_path(bmp_path) key_hex = self._model_key.hex() # Run via Wine cmd = [self._wine, str(self._loader_exe), dll_dir, bmp_wine, key_hex] result = subprocess.run( cmd, capture_output=True, text=True, timeout=60, env={**os.environ, "WINEDEBUG": "-all"}, # suppress Wine debug ) if result.returncode != 0: raise RuntimeError(f"Wine loader failed:\n{result.stderr}") # Parse JSON output return json.loads(result.stdout.strip()) finally: if bmp_path != image_path and bmp_path.exists(): bmp_path.unlink() def recognize_pil(self, image: "Image.Image") -> dict: """Run OCR on a PIL Image.""" with tempfile.NamedTemporaryFile(suffix=".bmp", delete=False) as f: image.convert("RGBA").save(f.name, format="BMP") try: return self.recognize_file(f.name) finally: os.unlink(f.name) @staticmethod def _to_bmp(path: Path) -> Path: """Convert image to BMP if needed.""" if path.suffix.lower() == ".bmp": return path from PIL import Image as PILImage bmp_path = path.with_suffix(".bmp") img = PILImage.open(path).convert("RGBA") img.save(bmp_path, format="BMP") return bmp_path @staticmethod def _to_wine_path(path: Path) -> str: """Convert Unix path to Wine Z: drive path.""" return "Z:" + str(path).replace("/", "\\") # ─── Direct approach: Wine + ctypes (experimental) ───────────────── class WineCtypesBridge: """Alternative: Use Wine's DLL loading directly from Python on Linux. This uses a more experimental approach: 1. Set up Wine prefix with the DLLs 2. Use ctypes to load DLL through Wine's loader This is EXPERIMENTAL and requires: - winelib development headers - Proper Wine 64-bit prefix """ pass # TODO: Implement if subprocess approach works # ─── CLI ─────────────────────────────────────────────────────────── def main(): """CLI entry point for testing Wine bridge.""" import argparse parser = argparse.ArgumentParser(description="OneOCR Wine Bridge") parser.add_argument("command", choices=["check", "compile", "run", "test"]) parser.add_argument("--image", "-i", help="Image path for run/test") parser.add_argument("--ocr-data", help="Path to ocr_data directory") args = parser.parse_args() bridge = WineBridge(ocr_data_dir=args.ocr_data) if args.command == "check": checks = bridge.check_requirements() print("Wine Bridge Requirements Check:") for k, v in checks.items(): status = "✅" if v and v != "not found" else "❌" print(f" {status} {k}: {v}") elif args.command == "compile": try: bridge.compile_loader() print("✅ Loader compiled successfully") except RuntimeError as e: print(f"❌ {e}") elif args.command == "run": if not args.image: print("Error: --image required for run command") return result = bridge.recognize_file(args.image) print(json.dumps(result, indent=2, ensure_ascii=False)) elif args.command == "test": # Run on all test images test_dir = Path(__file__).resolve().parent.parent / "working_space" / "input" if not test_dir.exists(): print(f"Test directory not found: {test_dir}") return for img in sorted(test_dir.glob("*.png")): try: result = bridge.recognize_file(img) lines = result.get("lines", []) text = " | ".join(l["text"] for l in lines[:3]) print(f" ✅ {img.name}: {text[:80]}...") except Exception as e: print(f" ❌ {img.name}: {e}") if __name__ == "__main__": main()