Update app.py
Browse files
app.py
CHANGED
|
@@ -1,87 +1,84 @@
|
|
| 1 |
-
import gradio as gr
|
| 2 |
-
import
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
-
|
| 28 |
-
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
status(0, desc='\nStep
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
img_gen
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
demo_data
|
| 65 |
-
'../src/demo_data/
|
| 66 |
-
'../src/demo_data/
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
<li>
|
| 78 |
-
|
| 79 |
-
<li>
|
| 80 |
-
<li>
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
inputs=[gr.Image(type='filepath', label='Input image')],
|
| 85 |
-
outputs=gr.Image(label='Output image: overlay with recognized text', type='pil', format='jpeg'),
|
| 86 |
-
examples=demo_data)
|
| 87 |
iface.launch()
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from helpers import OCRD
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
def run_ocrd_pipeline(img_path, status=gr.Progress(), binarize_mode='detailed', min_pixel_sum=30, median_bounds=(None, None), font_size=30):
|
| 6 |
+
"""
|
| 7 |
+
Executes the OCRD pipeline on an image from file loading to text overlay creation. This function orchestrates
|
| 8 |
+
the calling of various OCRD class methods to process the image, extract and recognize text, and then overlay
|
| 9 |
+
this text on the original image.
|
| 10 |
+
|
| 11 |
+
Parameters:
|
| 12 |
+
img_path (str): Path to the image file.
|
| 13 |
+
binarize_mode (str): Mode to be used for image binarization. Can be 'detailed', 'fast', or 'no'.
|
| 14 |
+
min_pixel_sum (int, optional): Minimum sum of pixels to consider a text line segmentation for extraction.
|
| 15 |
+
If 'default', default values are applied.
|
| 16 |
+
median_bounds (tuple, optional): Bounds to filter text line segmentations based on size relative to the median.
|
| 17 |
+
If 'default', default values are applied.
|
| 18 |
+
font_size (int, optional): Font size to be used in text overlay. If 'default', a default size or scaling logic is applied.
|
| 19 |
+
|
| 20 |
+
Returns:
|
| 21 |
+
Image: An image with overlay text, where text is extracted and recognized from the original image.
|
| 22 |
+
|
| 23 |
+
This function handles:
|
| 24 |
+
- Image binarization.
|
| 25 |
+
- Text line segmentation.
|
| 26 |
+
- Text line extraction and deskewing.
|
| 27 |
+
- Optical character recognition on text lines.
|
| 28 |
+
- Creating an image overlay with recognized text.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
# prepare kwargs
|
| 32 |
+
efadt_kwargs = {}
|
| 33 |
+
if min_pixel_sum != 'default':
|
| 34 |
+
efadt_kwargs['min_pixel_sum'] = min_pixel_sum
|
| 35 |
+
if median_bounds != 'default':
|
| 36 |
+
efadt_kwargs['median_bounds'] = median_bounds
|
| 37 |
+
|
| 38 |
+
ctoi_kwargs = {}
|
| 39 |
+
if font_size != 'default':
|
| 40 |
+
ctoi_kwargs['font_size'] = font_size
|
| 41 |
+
|
| 42 |
+
# run pipeline
|
| 43 |
+
#status(0, desc="\nReading image...\n")
|
| 44 |
+
ocrd = OCRD(img_path)
|
| 45 |
+
status(0, desc='\nStep 1/5: Binarizing image...\n')
|
| 46 |
+
binarized = ocrd.binarize_image(ocrd.image, binarize_mode)
|
| 47 |
+
status(0, desc='\nStep 2/5: Segmenting textlines...\n')
|
| 48 |
+
textline_segments = ocrd.segment_textlines(binarized)
|
| 49 |
+
status(0, desc='\nStep 3/5: Extracting, filtering and de-skewing textlines...\n')
|
| 50 |
+
image_scaled = ocrd.scale_image(ocrd.image) # textline_segments were predicted on rescaled image
|
| 51 |
+
textline_images, _ = ocrd.extract_filter_and_deskew_textlines(image_scaled, textline_segments[...,0], **efadt_kwargs)
|
| 52 |
+
status(0, desc='\nStep 4/5: OCR on textlines...\n')
|
| 53 |
+
textline_preds = ocrd.ocr_on_textlines(textline_images)
|
| 54 |
+
status(0, desc='\nStep 5/5: Creating output overlay image...')
|
| 55 |
+
img_gen = ocrd.create_text_overlay_image(textline_images, textline_preds, (image_scaled.shape[0], image_scaled.shape[1]), **ctoi_kwargs)
|
| 56 |
+
status(1, desc='\nJOB COMPLETED\n')
|
| 57 |
+
|
| 58 |
+
return img_gen
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
demo_data = [
|
| 62 |
+
'../src/demo_data/act_image.jpg',
|
| 63 |
+
'../src/demo_data/newjersey1_image.jpg',
|
| 64 |
+
'../src/demo_data/newjersey2_image.jpg',
|
| 65 |
+
'../src/demo_data/notes_image.jpg',
|
| 66 |
+
'../src/demo_data/washington_image.jpg'
|
| 67 |
+
]
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
iface = gr.Interface(run_ocrd_pipeline,
|
| 71 |
+
title="OCRD Pipeline",
|
| 72 |
+
description="<ul><li>This interactive demo showcases an 'Optical Character Recognition Digitization' pipeline that processes \
|
| 73 |
+
images to recognize text.</li> \
|
| 74 |
+
<li>Steps include binarization, text line segmentation, extraction, filtering and deskewing as well as OCR. \
|
| 75 |
+
Results are displayed as a generated overlay image.</li> \
|
| 76 |
+
<li>Optimized for English; other languages (e.g. German) may require OCR model fine-tuning.</li> \
|
| 77 |
+
<li>Uses free CPU-based compute, which is rather slow. A pipeline run will take up to 10 minutes. \
|
| 78 |
+
For lengthy waits, pre-computed demo results are available for download: https://github.com/pluniak/ocrd/tree/main/src/demo_data.</li> \
|
| 79 |
+
<li>Note: The demo is just a first version! OCR performance and computation speed can be optimized.</li> \
|
| 80 |
+
<li>The demo is based on code from my GitHub repository: https://github.com/pluniak/ocrd/tree/main</li></ul>",
|
| 81 |
+
inputs=[gr.Image(type='filepath', label='Input image')],
|
| 82 |
+
outputs=gr.Image(label='Output image: overlay with recognized text', type='pil', format='jpeg'),
|
| 83 |
+
examples=demo_data)
|
|
|
|
|
|
|
|
|
|
| 84 |
iface.launch()
|