File size: 2,831 Bytes
ce847d4
be4a6f1
ce847d4
be4a6f1
 
 
 
ce847d4
 
 
 
be4a6f1
 
 
ce847d4
 
be4a6f1
ce847d4
 
 
 
 
 
be4a6f1
 
 
 
 
 
ce847d4
be4a6f1
 
ce847d4
 
 
be4a6f1
 
ce847d4
 
be4a6f1
 
ce847d4
be4a6f1
 
 
ce847d4
be4a6f1
 
 
 
 
 
ce847d4
be4a6f1
 
 
 
 
ce847d4
be4a6f1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce847d4
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
OneOCR — Cross-platform OCR using Microsoft OneOCR engine.

Available backends (auto-selected):
  1. OcrEngine:        Windows-only DLL wrapper (100% accuracy, fastest)
  2. OcrEngineUnified: Auto-selects best backend (DLL → Wine → ONNX)
  3. OcrEngineOnnx:    Cross-platform ONNX reimplementation (~53% match rate)

Usage:
    python main.py <image_path>
    python main.py                   # uses test.png
    python main.py --backend dll     # force DLL backend
    python main.py --backend wine    # force Wine backend (Linux)
    python main.py --backend onnx    # force ONNX backend
"""

import argparse
import sys
from pathlib import Path
from PIL import Image


def main():
    parser = argparse.ArgumentParser(description="OneOCR — Cross-platform OCR")
    parser.add_argument("image", nargs="?", default="test.png", help="Image path")
    parser.add_argument("--backend", "-b", choices=["dll", "wine", "onnx", "auto"],
                        default="auto", help="OCR backend (default: auto)")
    parser.add_argument("--output", "-o", help="Save results to JSON file")
    args = parser.parse_args()

    if not Path(args.image).exists():
        print(f"Image not found: {args.image}")
        print(f"Usage: python main.py <image_path>")
        sys.exit(1)

    img = Image.open(args.image)
    print(f"Image: {args.image} ({img.size[0]}x{img.size[1]})")
    print()

    # Use unified engine (auto-selects best backend)
    from ocr.engine_unified import OcrEngineUnified

    force = args.backend if args.backend != "auto" else None
    engine = OcrEngineUnified(force_backend=force)
    result = engine.recognize_pil(img)

    print(f"=== Backend: {engine.backend_name.upper()} ===")
    print(f"Text: {result.text}")
    print(f"Lines: {len(result.lines)}, Confidence: {result.average_confidence:.1%}")
    if result.text_angle is not None:
        print(f"Angle: {result.text_angle:.1f}")
    print()

    for i, line in enumerate(result.lines):
        words = " | ".join(
            f"{w.text} ({w.confidence:.0%})" for w in line.words
        )
        print(f"  L{i}: {words}")

    # Save JSON if requested
    if args.output:
        import json
        data = {
            "backend": engine.backend_name,
            "text": result.text,
            "text_angle": result.text_angle,
            "lines": [
                {
                    "text": line.text,
                    "words": [
                        {"text": w.text, "confidence": w.confidence}
                        for w in line.words
                    ]
                }
                for line in result.lines
            ],
        }
        Path(args.output).write_text(json.dumps(data, indent=2, ensure_ascii=False))
        print(f"\nResults saved to {args.output}")


if __name__ == "__main__":
    main()