import gradio as gr
import tensorflow as tf
import numpy as np
import cv2

# -------------------
# Configuration
# -------------------
IMG_HEIGHT = 32
IMG_WIDTH = 128
CHARACTERS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 "
NUM_CLASSES = len(CHARACTERS) + 1  # +1 for CTC blank


# -------------------
# Build CRNN Model
# -------------------
def build_model():
    inputs = tf.keras.layers.Input(shape=(IMG_HEIGHT, IMG_WIDTH, 1))

    x = tf.keras.layers.Conv2D(32, (3,3), activation="relu", padding="same")(inputs)
    x = tf.keras.layers.MaxPooling2D((2,2))(x)

    x = tf.keras.layers.Conv2D(64, (3,3), activation="relu", padding="same")(x)
    x = tf.keras.layers.MaxPooling2D((2,2))(x)

    new_shape = ((IMG_WIDTH // 4), (IMG_HEIGHT // 4) * 64)
    x = tf.keras.layers.Reshape(target_shape=new_shape)(x)

    x = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(128, return_sequences=True)
    )(x)

    x = tf.keras.layers.Bidirectional(
        tf.keras.layers.LSTM(128, return_sequences=True)
    )(x)

    outputs = tf.keras.layers.Dense(NUM_CLASSES, activation="softmax")(x)

    model = tf.keras.models.Model(inputs, outputs)
    return model


model = build_model()

# If you trained your model, uncomment:
# model.load_weights("model.h5")


# -------------------
# Preprocess Image
# -------------------
def preprocess_image(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
    image = image.astype(np.float32) / 255.0
    image = np.expand_dims(image, axis=-1)
    image = np.expand_dims(image, axis=0)
    return image


# -------------------
# Decode CTC Output
# -------------------
def decode_predictions(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]

    results = tf.keras.backend.ctc_decode(
        pred, input_length=input_len, greedy=True
    )[0][0]

    output_text = ""
    for res in results.numpy()[0]:
        if res != -1:
            output_text += CHARACTERS[res]

    return output_text


# -------------------
# Prediction Function
# -------------------
def predict(image):
    img = preprocess_image(image)
    pred = model.predict(img)
    text = decode_predictions(pred)
    return text


# -------------------
# Combined Handler
# -------------------
def handle_input(image, typed_text):
    # If user typed text → return it directly
    if typed_text is not None and typed_text.strip() != "":
        return typed_text

    # If image uploaded → run prediction
    if image is not None:
        return predict(image)

    return "Please upload an image or enter text."


# -------------------
# Gradio Interface
# -------------------
interface = gr.Interface(
    fn=handle_input,
    inputs=[
        gr.Image(type="numpy", label="Upload Handwritten Image"),
        gr.Textbox(label="Or Type Text Manually")
    ],
    outputs=gr.Textbox(label="Output Text"),
    title="✍️ Handwritten Line Text Recognition",
    description="Upload a handwritten line image OR type text manually"
)

interface.launch()