import gradio as gr import tensorflow as tf import numpy as np import cv2 # ------------------- # Configuration # ------------------- IMG_HEIGHT = 32 IMG_WIDTH = 128 CHARACTERS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 " NUM_CLASSES = len(CHARACTERS) + 1 # +1 for CTC blank # ------------------- # Build CRNN Model # ------------------- def build_model(): inputs = tf.keras.layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 1)) x = tf.keras.layers.Conv2D(32, (3,3), activation="relu", padding="same")(inputs) x = tf.keras.layers.MaxPooling2D((2,2))(x) x = tf.keras.layers.Conv2D(64, (3,3), activation="relu", padding="same")(x) x = tf.keras.layers.MaxPooling2D((2,2))(x) new_shape = ((IMG_WIDTH // 4), (IMG_HEIGHT // 4) * 64) x = tf.keras.layers.Reshape(target_shape=new_shape)(x) x = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(128, return_sequences=True) )(x) x = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(128, return_sequences=True) )(x) outputs = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")(x) model = tf.keras.models.Model(inputs, outputs) return model model = build_model() # If you trained your model, uncomment: # model.load_weights("model.h5") # ------------------- # Preprocess Image # ------------------- def preprocess_image(image): image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT)) image = image.astype(np.float32) / 255.0 image = np.expand_dims(image, axis=-1) image = np.expand_dims(image, axis=0) return image # ------------------- # Decode CTC Output # ------------------- def decode_predictions(pred): input_len = np.ones(pred.shape[0]) * pred.shape[1] results = tf.keras.backend.ctc_decode( pred, input_length=input_len, greedy=True )[0][0] output_text = "" for res in results.numpy()[0]: if res != -1: output_text += CHARACTERS[res] return output_text # ------------------- # Prediction Function # ------------------- def predict(image): img = preprocess_image(image) pred = model.predict(img) text = decode_predictions(pred) return text # ------------------- # Combined Handler # ------------------- def handle_input(image, typed_text): # If user typed text → return it directly if typed_text is not None and typed_text.strip() != "": return typed_text # If image uploaded → run prediction if image is not None: return predict(image) return "Please upload an image or enter text." # ------------------- # Gradio Interface # ------------------- interface = gr.Interface( fn=handle_input, inputs=[ gr.Image(type="numpy", label="Upload Handwritten Image"), gr.Textbox(label="Or Type Text Manually") ], outputs=gr.Textbox(label="Output Text"), title="✍️ Handwritten Line Text Recognition", description="Upload a handwritten line image OR type text manually" ) interface.launch()