"""Profile ONNX engine performance to find bottlenecks.""" import sys, os sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import time import cv2 import numpy as np from ocr.engine_onnx import OcrEngineOnnx engine = OcrEngineOnnx() img = cv2.imread('working_space/input/ocr_test (2).png') img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Warm up (first call loads models) t0 = time.perf_counter() r = engine.recognize_numpy(img_rgb) t1 = time.perf_counter() print(f'First call (cold): {(t1-t0)*1000:.0f}ms') # Profile individual stages h, w = img_rgb.shape[:2] print(f'Image: {w}x{h}') # Time detection t0 = time.perf_counter() quads, scale = engine._detect(img_rgb) t1 = time.perf_counter() print(f'Detection: {(t1-t0)*1000:.1f}ms ({len(quads)} quads)') # Time recognition per quad for i, q in enumerate(quads[:3]): crop = engine._crop_quad(img_rgb, q) if crop is None: continue t0 = time.perf_counter() sid = engine._identify_script(crop) t1 = time.perf_counter() text, conf, _ = engine._recognize(crop, 2) t2 = time.perf_counter() print(f' Quad {i}: ScriptID={t1-t0:.3f}s, Recognize={t2-t1:.3f}s -> "{text}"') # Second call (warm) t0 = time.perf_counter() r = engine.recognize_numpy(img_rgb) t1 = time.perf_counter() print(f'Second call (warm): {(t1-t0)*1000:.0f}ms') # Third call (warm) t0 = time.perf_counter() r = engine.recognize_numpy(img_rgb) t1 = time.perf_counter() print(f'Third call (warm): {(t1-t0)*1000:.0f}ms') # Check session cached count print(f'\nCached sessions: {len(engine._sessions)}') # Check session options sess = list(engine._sessions.values())[0] if engine._sessions else None if sess: print(f'Session providers: {sess.get_providers()}') opts = sess.get_session_options() print(f'Inter-op threads: {opts.inter_op_num_threads}') print(f'Intra-op threads: {opts.intra_op_num_threads}') print(f'Optimization level: {opts.graph_optimization_level}') # Now profile a big image print('\n--- Large image (test.png) ---') img2 = cv2.imread('working_space/input/test.png') img2_rgb = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB) h2, w2 = img2_rgb.shape[:2] print(f'Image: {w2}x{h2}') t0 = time.perf_counter() r2 = engine.recognize_numpy(img2_rgb) t1 = time.perf_counter() print(f'Total: {(t1-t0)*1000:.0f}ms ({len(r2.lines)} lines)') # Time just detection for test.png t0 = time.perf_counter() quads2, scale2 = engine._detect(img2_rgb) t1 = time.perf_counter() print(f'Detection only: {(t1-t0)*1000:.1f}ms ({len(quads2)} quads, scale={scale2:.2f})')