ocr-for-captcha

Runtime error

App Files Files Community

eoeooe commited on Apr 22

Commit

2000421

verified ·

1 Parent(s): a4f0c21

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -27

app.py CHANGED Viewed

@@ -1,70 +1,150 @@
 import tensorflow as tf
 from tensorflow import keras
 from tensorflow.keras import layers
-from huggingface_hub import from_pretrained_keras
 import numpy as np
 import gradio as gr
 max_length = 5
 img_width = 200
 img_height = 50
-model = from_pretrained_keras("keras-io/ocr-for-captcha", compile=False)
 prediction_model = keras.models.Model(
-    model.get_layer(name="image").input, model.get_layer(name="dense2").output
 )
-with open("vocab.txt", "r") as f:
-    vocab = f.read().splitlines()
-# Mapping integers back to original characters
 num_to_char = layers.StringLookup(
     vocabulary=vocab, mask_token=None, invert=True
 )
 def decode_batch_predictions(pred):
     input_len = np.ones(pred.shape[0]) * pred.shape[1]
-    # Use greedy search. For complex tasks, you can use beam search
-    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
-        :, :max_length
-    ]
-    # Iterate over the results and get back the text
     output_text = []
     for res in results:
         res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
         output_text.append(res)
     return output_text
 def classify_image(img_path):
-    # 1. Read image
     img = tf.io.read_file(img_path)
-    # 2. Decode and convert to grayscale
     img = tf.io.decode_png(img, channels=1)
-    # 3. Convert to float32 in [0, 1] range
     img = tf.image.convert_image_dtype(img, tf.float32)
-    # 4. Resize to the desired size
     img = tf.image.resize(img, [img_height, img_width])
-    # 5. Transpose the image because we want the time
-    # dimension to correspond to the width of the image.
     img = tf.transpose(img, perm=[1, 0, 2])
     img = tf.expand_dims(img, axis=0)
     preds = prediction_model.predict(img)
     pred_text = decode_batch_predictions(preds)
     return pred_text[0]
-image = gr.inputs.Image(type='filepath')
-text = gr.outputs.Textbox()
-iface = gr.Interface(classify_image,image,text,
-  title="OCR for CAPTCHA",
-	description = "Keras Implementation of OCR model for reading captcha 🤖🦹🏻",
-        article = "Author: <a href=\"https://huggingface.co/anuragshas\">Anurag Singh</a>. Based on the keras example from <a href=\"https://keras.io/examples/vision/captcha_ocr/\">A_K_Nain</a>",
-        examples = ["dd764.png","3p4nn.png"]
 )
-iface.launch()

+```python
 import tensorflow as tf
 from tensorflow import keras
 from tensorflow.keras import layers
+from huggingface_hub import hf_hub_download
 import numpy as np
 import gradio as gr
+import os
 max_length = 5
 img_width = 200
 img_height = 50
+# -----------------------------
+# Load model from Hugging Face
+# -----------------------------
+def load_model():
+    possible_files = ["model.h5", "model.keras"]
+    model_path = None
+    for fname in possible_files:
+        try:
+            model_path = hf_hub_download(
+                repo_id="keras-io/ocr-for-captcha",
+                filename=fname
+            )
+            print(f"Loaded model file: {fname}")
+            break
+        except Exception:
+            continue
+    if model_path is None:
+        raise ValueError("No compatible model file found in Hugging Face repo.")
+    return keras.models.load_model(model_path, compile=False)
+model = load_model()
+# Create prediction model (same as your original)
 prediction_model = keras.models.Model(
+    model.get_layer(name="image").input,
+    model.get_layer(name="dense2").output
 )
+# -----------------------------
+# Load vocabulary
+# -----------------------------
+def load_vocab():
+    if os.path.exists("vocab.txt"):
+        with open("vocab.txt", "r") as f:
+            return f.read().splitlines()
+    # fallback: download from HF
+    vocab_path = hf_hub_download(
+        repo_id="keras-io/ocr-for-captcha",
+        filename="vocab.txt"
+    )
+    with open(vocab_path, "r") as f:
+        return f.read().splitlines()
+vocab = load_vocab()
 num_to_char = layers.StringLookup(
     vocabulary=vocab, mask_token=None, invert=True
 )
+# -----------------------------
+# Decode predictions
+# -----------------------------
 def decode_batch_predictions(pred):
     input_len = np.ones(pred.shape[0]) * pred.shape[1]
+    results = keras.backend.ctc_decode(
+        pred, input_length=input_len, greedy=True
+    )[0][0][:, :max_length]
     output_text = []
     for res in results:
         res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
         output_text.append(res)
     return output_text
+# -----------------------------
+# Prediction function
+# -----------------------------
 def classify_image(img_path):
     img = tf.io.read_file(img_path)
     img = tf.io.decode_png(img, channels=1)
     img = tf.image.convert_image_dtype(img, tf.float32)
     img = tf.image.resize(img, [img_height, img_width])
     img = tf.transpose(img, perm=[1, 0, 2])
     img = tf.expand_dims(img, axis=0)
     preds = prediction_model.predict(img)
     pred_text = decode_batch_predictions(preds)
     return pred_text[0]
+# -----------------------------
+# Gradio UI (modern API)
+# -----------------------------
+image = gr.Image(type="filepath")
+text = gr.Textbox()
+iface = gr.Interface(
+    fn=classify_image,
+    inputs=image,
+    outputs=text,
+    title="OCR for CAPTCHA",
+    description="Keras implementation of OCR model for reading CAPTCHA 🤖",
+    examples=["dd764.png", "3p4nn.png"]
 )
+if __name__ == "__main__":
+    iface.launch()
+```
+---
+# ⚠️ If this still fails
+Most likely reason:
+👉 The Hugging Face repo does **not include a full saved model**
+If that happens, tell me and I’ll:
+* rebuild the model architecture from the Keras example
+* load weights properly
+* give you a guaranteed working version
+---
+# ✔️ What changed
+* ❌ Removed `from_pretrained_keras`
+* ✅ Added `hf_hub_download`
+* ✅ Added fallback for model filename
+* ✅ Updated Gradio API
+* ✅ Made vocab loading safer
+---
+If you want, I can also make this:
+* run on GPU
+* deploy on Hugging Face Spaces
+* or convert it to a fast API backend