starrednt1

Sleeping

App Files Files Community

toandev commited on Jan 29, 2025

Commit

bcb8d00

1 Parent(s): 809f485

Add image examples and refactor app for improved OCR functionality

Browse files

Files changed (6) hide show

app.py +22 -15
1.png → examples/1.png +0 -0
2.jpg → examples/2.jpg +0 -0
3.jpg → examples/3.jpg +0 -0
4.png → examples/4.png +0 -0
5.png → examples/5.png +0 -0

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import onnxruntime as rt
 from torchvision import transforms as T
 from pathlib import Path
 from PIL import Image
-from huggingface_hub import login, hf_hub_download
 import os
 import gradio as gr
@@ -12,24 +12,27 @@ import gradio as gr
 from utils.tokenizer_base import Tokenizer
-login(os.getenv("HF_TOKEN"))
 cwd = Path(__file__).parent.resolve()
 model_file = os.path.join(cwd, hf_hub_download("toandev/OCR-for-Captcha", "model.onnx"))
 img_size = (32, 128)
 vocab = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
 tokenizer = Tokenizer(vocab)
 def to_numpy(tensor):
     return (
         tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
     )
 def get_transform(img_size):
     transforms = []
     transforms.extend(
         [
@@ -42,6 +45,7 @@ def get_transform(img_size):
 def load_model(model_file):
     transform = get_transform(img_size)
     onnx_model = onnx.load(model_file)
@@ -51,10 +55,12 @@ def load_model(model_file):
     return transform, s
 transform, s = load_model(model_file=model_file)
-def infer(img: Image.Image):
     x = transform(img.convert("RGB")).unsqueeze(0)
     ort_inputs = {s.get_inputs()[0].name: to_numpy(x)}
@@ -65,19 +71,20 @@ def infer(img: Image.Image):
     return preds[0]
-demo = gr.Interface(
-    infer,
-    gr.components.Image(type="pil"),
-    gr.components.Textbox(),
     title="OCR for CAPTCHA",
     description="Solve captchas from images including letters and numbers, success rate is about 80-90%.",
     examples=[
-        "1.png",
-        "2.jpg",
-        "3.jpg",
-        "4.png",
-        "5.png",
     ],
 )
-demo.launch()

 from torchvision import transforms as T
 from pathlib import Path
 from PIL import Image
+from huggingface_hub import hf_hub_download
 import os
 import gradio as gr
 from utils.tokenizer_base import Tokenizer
+# Download the model from Hugging Face Hub
 cwd = Path(__file__).parent.resolve()
 model_file = os.path.join(cwd, hf_hub_download("toandev/OCR-for-Captcha", "model.onnx"))
+# Define the image size and vocabulary
 img_size = (32, 128)
 vocab = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
+# Initialize the tokenizer
 tokenizer = Tokenizer(vocab)
 def to_numpy(tensor):
+    """Convert tensor to numpy."""
     return (
         tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
     )
 def get_transform(img_size):
+    """Preprocess the input image."""
     transforms = []
     transforms.extend(
         [
 def load_model(model_file):
+    """Load the model and return the transform function."""
     transform = get_transform(img_size)
     onnx_model = onnx.load(model_file)
     return transform, s
+# Load the model
 transform, s = load_model(model_file=model_file)
+def process(img: Image.Image):
+    """Predict the text from the input image."""
     x = transform(img.convert("RGB")).unsqueeze(0)
     ort_inputs = {s.get_inputs()[0].name: to_numpy(x)}
     return preds[0]
+iface = gr.Interface(
+    process,
+    gr.Image(type="pil", label="Input Image"),
+    gr.Textbox(label="Predicted Text"),
     title="OCR for CAPTCHA",
     description="Solve captchas from images including letters and numbers, success rate is about 80-90%.",
     examples=[
+        "examples/1.png",
+        "examples/2.jpg",
+        "examples/3.jpg",
+        "examples/4.png",
+        "examples/5.png",
     ],
 )
+if __name__ == "__main__":
+    iface.launch()

1.png → examples/1.png RENAMED Viewed

File without changes

2.jpg → examples/2.jpg RENAMED Viewed

File without changes

3.jpg → examples/3.jpg RENAMED Viewed

File without changes

4.png → examples/4.png RENAMED Viewed

File without changes

5.png → examples/5.png RENAMED Viewed

File without changes