TextRecognition / app.py
charantejapolavarapu's picture
Update app.py
4dc310f verified
import gradio as gr
import tensorflow as tf
import numpy as np
import cv2
# -------------------
# Configuration
# -------------------
IMG_HEIGHT = 32
IMG_WIDTH = 128
CHARACTERS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 "
NUM_CLASSES = len(CHARACTERS) + 1 # +1 for CTC blank
# -------------------
# Build CRNN Model
# -------------------
def build_model():
inputs = tf.keras.layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 1))
x = tf.keras.layers.Conv2D(32, (3,3), activation="relu", padding="same")(inputs)
x = tf.keras.layers.MaxPooling2D((2,2))(x)
x = tf.keras.layers.Conv2D(64, (3,3), activation="relu", padding="same")(x)
x = tf.keras.layers.MaxPooling2D((2,2))(x)
new_shape = ((IMG_WIDTH // 4), (IMG_HEIGHT // 4) * 64)
x = tf.keras.layers.Reshape(target_shape=new_shape)(x)
x = tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(128, return_sequences=True)
)(x)
x = tf.keras.layers.Bidirectional(
tf.keras.layers.LSTM(128, return_sequences=True)
)(x)
outputs = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")(x)
model = tf.keras.models.Model(inputs, outputs)
return model
model = build_model()
# If you trained your model, uncomment:
# model.load_weights("model.h5")
# -------------------
# Preprocess Image
# -------------------
def preprocess_image(image):
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
image = image.astype(np.float32) / 255.0
image = np.expand_dims(image, axis=-1)
image = np.expand_dims(image, axis=0)
return image
# -------------------
# Decode CTC Output
# -------------------
def decode_predictions(pred):
input_len = np.ones(pred.shape[0]) * pred.shape[1]
results = tf.keras.backend.ctc_decode(
pred, input_length=input_len, greedy=True
)[0][0]
output_text = ""
for res in results.numpy()[0]:
if res != -1:
output_text += CHARACTERS[res]
return output_text
# -------------------
# Prediction Function
# -------------------
def predict(image):
img = preprocess_image(image)
pred = model.predict(img)
text = decode_predictions(pred)
return text
# -------------------
# Combined Handler
# -------------------
def handle_input(image, typed_text):
# If user typed text → return it directly
if typed_text is not None and typed_text.strip() != "":
return typed_text
# If image uploaded → run prediction
if image is not None:
return predict(image)
return "Please upload an image or enter text."
# -------------------
# Gradio Interface
# -------------------
interface = gr.Interface(
fn=handle_input,
inputs=[
gr.Image(type="numpy", label="Upload Handwritten Image"),
gr.Textbox(label="Or Type Text Manually")
],
outputs=gr.Textbox(label="Output Text"),
title="✍️ Handwritten Line Text Recognition",
description="Upload a handwritten line image OR type text manually"
)
interface.launch()