Spaces:

Tejveer12
/

IIIT-5K

Sleeping

IIIT-5K / app.py

Update app.py

6204233 verified 3 months ago

1.66 kB

	import cv2
	import numpy as np
	import gradio as gr
	import onnxruntime as ort

	# Load the optimized model
	session = ort.InferenceSession("crnn.onnx")
	alphabet = ' ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' # Space at index 0 for CTC blank


	def preprocess(image):
	# Convert to grayscale
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	# Resize to match training (Height 32, Width 100)
	img = cv2.resize(gray, (100, 32))
	# Normalize (standard ToTensor() scale [0, 1])
	img = img.astype(np.float32) / 255.0
	# Add batch and channel dimensions: [1, 1, 32, 100]
	img = img[np.newaxis, np.newaxis, :, :]
	return img


	def ctc_decode(preds):
	# preds shape: [width_steps, batch, num_classes]
	tokens = np.argmax(preds, axis=2) # Get most likely char per slice
	tokens = tokens.squeeze(1) # Remove batch dim -> [width_steps]

	text = []
	for i in range(len(tokens)):
	# CTC Logic: Ignore blank (0) and repeated characters
	if tokens[i] != 0 and (i == 0 or tokens[i] != tokens[i - 1]):
	text.append(alphabet[tokens[i]])
	return "".join(text)


	def inference(image):
	if image is None:
	return "Please upload an image."

	img = preprocess(image)
	preds = session.run(None, {"input": img})[0]
	return ctc_decode(preds)


	# Gradio UI
	interface = gr.Interface(
	fn=inference,
	inputs=gr.Image(label="Upload Text Image"),
	outputs=gr.Textbox(label="Recognized Text"),
	title="OCR (IIIT5K)",
	description="CRNN + CTC model optimized for CPU inference using ONNX.",
	examples=["999_3.png","999_8.png", "997_7.png"]
	)

	if __name__ == "__main__":
	interface.launch()