captcha-ocr3

Build error

App Files Files Community

murtazadahmardeh commited on Nov 17, 2023

Commit

133bc8b

1 Parent(s): 047c82f

test first

Browse files

Files changed (1) hide show

app.py +62 -55

app.py CHANGED Viewed

@@ -1,70 +1,77 @@
 import torch
-import onnx
-import onnxruntime as rt
 from torchvision import transforms as T
-from PIL import Image
-from tokenizer_base import Tokenizer
-import pathlib
-import os
 import gradio as gr
-from huggingface_hub import Repository
-repo = Repository(
-    local_dir="secret_models",
-    repo_type="model",
-    clone_from="docparser/captcha",
-    token=True
-)
-repo.git_pull()
-cwd = pathlib.Path(__file__).parent.resolve()
-model_file = os.path.join(cwd,"secret_models","captcha.onnx")
-img_size = (32,128)
-charset = r"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
-tokenizer_base = Tokenizer(charset)
-def get_transform(img_size):
-        transforms = []
-        transforms.extend([
-            T.Resize(img_size, T.InterpolationMode.BICUBIC),
             T.ToTensor(),
             T.Normalize(0.5, 0.5)
         ])
-        return T.Compose(transforms)
-def to_numpy(tensor):
-    return tensor.detach().cpu().numpy() if tensor.requires_grad else tensor.cpu().numpy()
-def initialize_model(model_file):
-    transform = get_transform(img_size)
-    # Onnx model loading
-    onnx_model = onnx.load(model_file)
-    onnx.checker.check_model(onnx_model)
-    ort_session = rt.InferenceSession(model_file)
-    return transform,ort_session
-def get_text(img_org):
-    # img_org = Image.open(image_path)
-    # Preprocess. Model expects a batch of images with shape: (B, C, H, W)
-    x = transform(img_org.convert('RGB')).unsqueeze(0)
-    # compute ONNX Runtime output prediction
-    ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
-    logits = ort_session.run(None, ort_inputs)[0]
-    probs = torch.tensor(logits).softmax(-1)
-    preds, probs = tokenizer_base.decode(probs)
-    preds = preds[0]
-    print(preds)
-    return preds
-transform,ort_session = initialize_model(model_file=model_file)
-gr.Interface(
-    get_text,
-    inputs=gr.Image(type="pil"),
-    outputs=gr.outputs.Textbox(),
-    title="Text Captcha Reader",
-    examples=["8000.png","11JW29.png","2a8486.jpg","2nbcx.png",
-             "000679.png","000HU.png","00Uga.png.jpg","00bAQwhAZU.jpg",
-             "00h57kYf.jpg","0EoHdtVb.png","0JS21.png","0p98z.png","10010.png"]
-).launch()

 import torch
 from torchvision import transforms as T
 import gradio as gr
+class App:
+    title = 'Scene Text Recognition with<br/>Permuted Autoregressive Sequence Models'
+    models = ['parseq', 'parseq_tiny', 'abinet', 'crnn', 'trba', 'vitstr']
+    def __init__(self):
+        self._model_cache = {}
+        self._preprocess = T.Compose([
+            T.Resize((32, 128), T.InterpolationMode.BICUBIC),
             T.ToTensor(),
             T.Normalize(0.5, 0.5)
         ])
+    def _get_model(self, name):
+        if name in self._model_cache:
+            return self._model_cache[name]
+        model = torch.hub.load('baudm/parseq', name, pretrained=True).eval()
+        self._model_cache[name] = model
+        return model
+    @torch.inference_mode()
+    def __call__(self, model_name, image):
+        if image is None:
+            return '', []
+        model = self._get_model(model_name)
+        image = self._preprocess(image.convert('RGB')).unsqueeze(0)
+        # Greedy decoding
+        pred = model(image).softmax(-1)
+        label, _ = model.tokenizer.decode(pred)
+        raw_label, raw_confidence = model.tokenizer.decode(pred, raw=True)
+        # Format confidence values
+        max_len = 25 if model_name == 'crnn' else len(label[0]) + 1
+        conf = list(map('{:0.1f}'.format, raw_confidence[0][:max_len].tolist()))
+        return label[0], [raw_label[0][:max_len], conf]
+def main():
+    app = App()
+    with gr.Blocks(analytics_enabled=False, title=app.title.replace('<br/>', ' ')) as demo:
+        gr.Markdown(f"""
+            <div align="center">
+            # {app.title}
+            [![GitHub](https://img.shields.io/badge/baudm-parseq-blue?logo=github)](https://github.com/baudm/parseq)
+            </div>
+            To use this interactive demo for PARSeq and reproduced models:
+            1. Select which model you want to use.
+            2. Upload your own cropped image (or select from the given examples), or sketch on the canvas.
+            3. Click **Read Text**.
+            *NOTE*: None of these models were trained on handwritten text datasets.
+        """)
+        model_name = gr.Radio(app.models, value=app.models[0], label='The STR model to use')
+        with gr.Tabs():
+            with gr.TabItem('Image Upload'):
+                image_upload = gr.Image(type='pil', source='upload', label='Image')
+                read_upload = gr.Button('Read Text')
+            with gr.TabItem('Canvas Sketch'):
+                image_canvas = gr.Image(type='pil', source='canvas', label='Sketch')
+                read_canvas = gr.Button('Read Text')
+        output = gr.Textbox(max_lines=1, label='Model output')
+        #adv_output = gr.Checkbox(label='Show detailed output')
+        raw_output = gr.Dataframe(row_count=2, col_count=0, label='Raw output with confidence values ([0, 1] interval; [B] - BLANK token; [E] - EOS token)')
+        read_upload.click(app, inputs=[model_name, image_upload], outputs=[output, raw_output])
+        read_canvas.click(app, inputs=[model_name, image_canvas], outputs=[output, raw_output])
+        #adv_output.change(lambda x: gr.update(visible=x), inputs=adv_output, outputs=raw_output)
+    demo.launch()
+if __name__ == '__main__':
+    main()