oneocr / _archive /temp /profile_engine.py

OneOCR Dev

OneOCR - reverse engineering complete, ONNX pipeline 53% match rate

ce847d4 about 19 hours ago

2.57 kB

	"""Profile ONNX engine performance to find bottlenecks."""
	import sys, os
	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	import time
	import cv2
	import numpy as np

	from ocr.engine_onnx import OcrEngineOnnx

	engine = OcrEngineOnnx()

	img = cv2.imread('working_space/input/ocr_test (2).png')
	img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

	# Warm up (first call loads models)
	t0 = time.perf_counter()
	r = engine.recognize_numpy(img_rgb)
	t1 = time.perf_counter()
	print(f'First call (cold): {(t1-t0)*1000:.0f}ms')

	# Profile individual stages
	h, w = img_rgb.shape[:2]
	print(f'Image: {w}x{h}')

	# Time detection
	t0 = time.perf_counter()
	quads, scale = engine._detect(img_rgb)
	t1 = time.perf_counter()
	print(f'Detection: {(t1-t0)*1000:.1f}ms ({len(quads)} quads)')

	# Time recognition per quad
	for i, q in enumerate(quads[:3]):
	crop = engine._crop_quad(img_rgb, q)
	if crop is None:
	continue
	t0 = time.perf_counter()
	sid = engine._identify_script(crop)
	t1 = time.perf_counter()
	text, conf, _ = engine._recognize(crop, 2)
	t2 = time.perf_counter()
	print(f' Quad {i}: ScriptID={t1-t0:.3f}s, Recognize={t2-t1:.3f}s -> "{text}"')

	# Second call (warm)
	t0 = time.perf_counter()
	r = engine.recognize_numpy(img_rgb)
	t1 = time.perf_counter()
	print(f'Second call (warm): {(t1-t0)*1000:.0f}ms')

	# Third call (warm)
	t0 = time.perf_counter()
	r = engine.recognize_numpy(img_rgb)
	t1 = time.perf_counter()
	print(f'Third call (warm): {(t1-t0)*1000:.0f}ms')

	# Check session cached count
	print(f'\nCached sessions: {len(engine._sessions)}')

	# Check session options
	sess = list(engine._sessions.values())[0] if engine._sessions else None
	if sess:
	print(f'Session providers: {sess.get_providers()}')
	opts = sess.get_session_options()
	print(f'Inter-op threads: {opts.inter_op_num_threads}')
	print(f'Intra-op threads: {opts.intra_op_num_threads}')
	print(f'Optimization level: {opts.graph_optimization_level}')

	# Now profile a big image
	print('\n--- Large image (test.png) ---')
	img2 = cv2.imread('working_space/input/test.png')
	img2_rgb = cv2.cvtColor(img2, cv2.COLOR_BGR2RGB)
	h2, w2 = img2_rgb.shape[:2]
	print(f'Image: {w2}x{h2}')

	t0 = time.perf_counter()
	r2 = engine.recognize_numpy(img2_rgb)
	t1 = time.perf_counter()
	print(f'Total: {(t1-t0)*1000:.0f}ms ({len(r2.lines)} lines)')

	# Time just detection for test.png
	t0 = time.perf_counter()
	quads2, scale2 = engine._detect(img2_rgb)
	t1 = time.perf_counter()
	print(f'Detection only: {(t1-t0)*1000:.1f}ms ({len(quads2)} quads, scale={scale2:.2f})')