| import cv2 |
| import numpy as np |
| import gradio as gr |
| import onnxruntime as ort |
|
|
| |
| session = ort.InferenceSession("crnn.onnx") |
| alphabet = ' ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' |
|
|
|
|
| def preprocess(image): |
| |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
| |
| img = cv2.resize(gray, (100, 32)) |
| |
| img = img.astype(np.float32) / 255.0 |
| |
| img = img[np.newaxis, np.newaxis, :, :] |
| return img |
|
|
|
|
| def ctc_decode(preds): |
| |
| tokens = np.argmax(preds, axis=2) |
| tokens = tokens.squeeze(1) |
|
|
| text = [] |
| for i in range(len(tokens)): |
| |
| if tokens[i] != 0 and (i == 0 or tokens[i] != tokens[i - 1]): |
| text.append(alphabet[tokens[i]]) |
| return "".join(text) |
|
|
|
|
| def inference(image): |
| if image is None: |
| return "Please upload an image." |
|
|
| img = preprocess(image) |
| preds = session.run(None, {"input": img})[0] |
| return ctc_decode(preds) |
|
|
|
|
| |
| interface = gr.Interface( |
| fn=inference, |
| inputs=gr.Image(label="Upload Text Image"), |
| outputs=gr.Textbox(label="Recognized Text"), |
| title="OCR (IIIT5K)", |
| description="CRNN + CTC model optimized for CPU inference using ONNX.", |
| examples=["999_3.png","999_8.png", "997_7.png"] |
| ) |
|
|
| if __name__ == "__main__": |
| interface.launch() |