eoeooe commited on
Commit
2000421
·
verified ·
1 Parent(s): a4f0c21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -27
app.py CHANGED
@@ -1,70 +1,150 @@
 
1
  import tensorflow as tf
2
  from tensorflow import keras
3
  from tensorflow.keras import layers
4
 
5
- from huggingface_hub import from_pretrained_keras
6
 
7
  import numpy as np
8
  import gradio as gr
 
9
 
10
  max_length = 5
11
  img_width = 200
12
  img_height = 50
13
 
14
- model = from_pretrained_keras("keras-io/ocr-for-captcha", compile=False)
 
 
 
 
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  prediction_model = keras.models.Model(
17
- model.get_layer(name="image").input, model.get_layer(name="dense2").output
 
18
  )
19
 
20
- with open("vocab.txt", "r") as f:
21
- vocab = f.read().splitlines()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- # Mapping integers back to original characters
24
  num_to_char = layers.StringLookup(
25
  vocabulary=vocab, mask_token=None, invert=True
26
  )
27
 
 
 
 
28
  def decode_batch_predictions(pred):
29
  input_len = np.ones(pred.shape[0]) * pred.shape[1]
30
- # Use greedy search. For complex tasks, you can use beam search
31
- results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
32
- :, :max_length
33
- ]
34
- # Iterate over the results and get back the text
35
  output_text = []
36
  for res in results:
37
  res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
38
  output_text.append(res)
 
39
  return output_text
40
 
 
 
 
41
  def classify_image(img_path):
42
- # 1. Read image
43
  img = tf.io.read_file(img_path)
44
- # 2. Decode and convert to grayscale
45
  img = tf.io.decode_png(img, channels=1)
46
- # 3. Convert to float32 in [0, 1] range
47
  img = tf.image.convert_image_dtype(img, tf.float32)
48
- # 4. Resize to the desired size
49
  img = tf.image.resize(img, [img_height, img_width])
50
- # 5. Transpose the image because we want the time
51
- # dimension to correspond to the width of the image.
52
  img = tf.transpose(img, perm=[1, 0, 2])
53
  img = tf.expand_dims(img, axis=0)
 
54
  preds = prediction_model.predict(img)
55
  pred_text = decode_batch_predictions(preds)
 
56
  return pred_text[0]
57
-
58
- image = gr.inputs.Image(type='filepath')
59
- text = gr.outputs.Textbox()
60
-
61
- iface = gr.Interface(classify_image,image,text,
62
- title="OCR for CAPTCHA",
63
- description = "Keras Implementation of OCR model for reading captcha 🤖🦹🏻",
64
- article = "Author: <a href=\"https://huggingface.co/anuragshas\">Anurag Singh</a>. Based on the keras example from <a href=\"https://keras.io/examples/vision/captcha_ocr/\">A_K_Nain</a>",
65
- examples = ["dd764.png","3p4nn.png"]
 
 
 
 
 
66
  )
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
- iface.launch()
70
 
 
 
 
 
1
+ ```python
2
  import tensorflow as tf
3
  from tensorflow import keras
4
  from tensorflow.keras import layers
5
 
6
+ from huggingface_hub import hf_hub_download
7
 
8
  import numpy as np
9
  import gradio as gr
10
+ import os
11
 
12
  max_length = 5
13
  img_width = 200
14
  img_height = 50
15
 
16
+ # -----------------------------
17
+ # Load model from Hugging Face
18
+ # -----------------------------
19
+ def load_model():
20
+ possible_files = ["model.h5", "model.keras"]
21
 
22
+ model_path = None
23
+ for fname in possible_files:
24
+ try:
25
+ model_path = hf_hub_download(
26
+ repo_id="keras-io/ocr-for-captcha",
27
+ filename=fname
28
+ )
29
+ print(f"Loaded model file: {fname}")
30
+ break
31
+ except Exception:
32
+ continue
33
+
34
+ if model_path is None:
35
+ raise ValueError("No compatible model file found in Hugging Face repo.")
36
+
37
+ return keras.models.load_model(model_path, compile=False)
38
+
39
+ model = load_model()
40
+
41
+ # Create prediction model (same as your original)
42
  prediction_model = keras.models.Model(
43
+ model.get_layer(name="image").input,
44
+ model.get_layer(name="dense2").output
45
  )
46
 
47
+ # -----------------------------
48
+ # Load vocabulary
49
+ # -----------------------------
50
+ def load_vocab():
51
+ if os.path.exists("vocab.txt"):
52
+ with open("vocab.txt", "r") as f:
53
+ return f.read().splitlines()
54
+
55
+ # fallback: download from HF
56
+ vocab_path = hf_hub_download(
57
+ repo_id="keras-io/ocr-for-captcha",
58
+ filename="vocab.txt"
59
+ )
60
+ with open(vocab_path, "r") as f:
61
+ return f.read().splitlines()
62
+
63
+ vocab = load_vocab()
64
 
 
65
  num_to_char = layers.StringLookup(
66
  vocabulary=vocab, mask_token=None, invert=True
67
  )
68
 
69
+ # -----------------------------
70
+ # Decode predictions
71
+ # -----------------------------
72
  def decode_batch_predictions(pred):
73
  input_len = np.ones(pred.shape[0]) * pred.shape[1]
74
+
75
+ results = keras.backend.ctc_decode(
76
+ pred, input_length=input_len, greedy=True
77
+ )[0][0][:, :max_length]
78
+
79
  output_text = []
80
  for res in results:
81
  res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
82
  output_text.append(res)
83
+
84
  return output_text
85
 
86
+ # -----------------------------
87
+ # Prediction function
88
+ # -----------------------------
89
  def classify_image(img_path):
 
90
  img = tf.io.read_file(img_path)
 
91
  img = tf.io.decode_png(img, channels=1)
 
92
  img = tf.image.convert_image_dtype(img, tf.float32)
 
93
  img = tf.image.resize(img, [img_height, img_width])
 
 
94
  img = tf.transpose(img, perm=[1, 0, 2])
95
  img = tf.expand_dims(img, axis=0)
96
+
97
  preds = prediction_model.predict(img)
98
  pred_text = decode_batch_predictions(preds)
99
+
100
  return pred_text[0]
101
+
102
+ # -----------------------------
103
+ # Gradio UI (modern API)
104
+ # -----------------------------
105
+ image = gr.Image(type="filepath")
106
+ text = gr.Textbox()
107
+
108
+ iface = gr.Interface(
109
+ fn=classify_image,
110
+ inputs=image,
111
+ outputs=text,
112
+ title="OCR for CAPTCHA",
113
+ description="Keras implementation of OCR model for reading CAPTCHA 🤖",
114
+ examples=["dd764.png", "3p4nn.png"]
115
  )
116
 
117
+ if __name__ == "__main__":
118
+ iface.launch()
119
+ ```
120
+
121
+ ---
122
+
123
+ # ⚠️ If this still fails
124
+
125
+ Most likely reason:
126
+ 👉 The Hugging Face repo does **not include a full saved model**
127
+
128
+ If that happens, tell me and I’ll:
129
+
130
+ * rebuild the model architecture from the Keras example
131
+ * load weights properly
132
+ * give you a guaranteed working version
133
+
134
+ ---
135
+
136
+ # ✔️ What changed
137
+
138
+ * ❌ Removed `from_pretrained_keras`
139
+ * ✅ Added `hf_hub_download`
140
+ * ✅ Added fallback for model filename
141
+ * ✅ Updated Gradio API
142
+ * ✅ Made vocab loading safer
143
+
144
+ ---
145
 
146
+ If you want, I can also make this:
147
 
148
+ * run on GPU
149
+ * deploy on Hugging Face Spaces
150
+ * or convert it to a fast API backend