| import gradio as gr |
| import tensorflow as tf |
| import numpy as np |
| import cv2 |
|
|
| |
| |
| |
| IMG_HEIGHT = 32 |
| IMG_WIDTH = 128 |
| CHARACTERS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 " |
| NUM_CLASSES = len(CHARACTERS) + 1 |
|
|
|
|
| |
| |
| |
| def build_model(): |
| inputs = tf.keras.layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 1)) |
|
|
| x = tf.keras.layers.Conv2D(32, (3,3), activation="relu", padding="same")(inputs) |
| x = tf.keras.layers.MaxPooling2D((2,2))(x) |
|
|
| x = tf.keras.layers.Conv2D(64, (3,3), activation="relu", padding="same")(x) |
| x = tf.keras.layers.MaxPooling2D((2,2))(x) |
|
|
| new_shape = ((IMG_WIDTH // 4), (IMG_HEIGHT // 4) * 64) |
| x = tf.keras.layers.Reshape(target_shape=new_shape)(x) |
|
|
| x = tf.keras.layers.Bidirectional( |
| tf.keras.layers.LSTM(128, return_sequences=True) |
| )(x) |
|
|
| x = tf.keras.layers.Bidirectional( |
| tf.keras.layers.LSTM(128, return_sequences=True) |
| )(x) |
|
|
| outputs = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")(x) |
|
|
| model = tf.keras.models.Model(inputs, outputs) |
| return model |
|
|
|
|
| model = build_model() |
|
|
| |
| |
|
|
|
|
| |
| |
| |
| def preprocess_image(image): |
| image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
| image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT)) |
| image = image.astype(np.float32) / 255.0 |
| image = np.expand_dims(image, axis=-1) |
| image = np.expand_dims(image, axis=0) |
| return image |
|
|
|
|
| |
| |
| |
| def decode_predictions(pred): |
| input_len = np.ones(pred.shape[0]) * pred.shape[1] |
|
|
| results = tf.keras.backend.ctc_decode( |
| pred, input_length=input_len, greedy=True |
| )[0][0] |
|
|
| output_text = "" |
| for res in results.numpy()[0]: |
| if res != -1: |
| output_text += CHARACTERS[res] |
|
|
| return output_text |
|
|
|
|
| |
| |
| |
| def predict(image): |
| img = preprocess_image(image) |
| pred = model.predict(img) |
| text = decode_predictions(pred) |
| return text |
|
|
|
|
| |
| |
| |
| def handle_input(image, typed_text): |
| |
| if typed_text is not None and typed_text.strip() != "": |
| return typed_text |
|
|
| |
| if image is not None: |
| return predict(image) |
|
|
| return "Please upload an image or enter text." |
|
|
|
|
| |
| |
| |
| interface = gr.Interface( |
| fn=handle_input, |
| inputs=[ |
| gr.Image(type="numpy", label="Upload Handwritten Image"), |
| gr.Textbox(label="Or Type Text Manually") |
| ], |
| outputs=gr.Textbox(label="Output Text"), |
| title="✍️ Handwritten Line Text Recognition", |
| description="Upload a handwritten line image OR type text manually" |
| ) |
|
|
| interface.launch() |
|
|