Update app_pages/ocr_comparator.py
#3
by
Loren
- opened
- app_pages/ocr_comparator.py +25 -23
app_pages/ocr_comparator.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
| 1 |
"""This Streamlit app allows you to compare, from a given image, the results of different solutions:
|
| 2 |
EasyOcr, PaddleOCR, MMOCR, Tesseract
|
| 3 |
"""
|
| 4 |
-
|
| 5 |
import mim
|
| 6 |
|
| 7 |
mim.install(['mmengine>=0.7.1,<1.1.0'])
|
| 8 |
mim.install(['mmcv>=2.0.0rc4,<2.1.0'])
|
| 9 |
mim.install(['mmdet>=3.0.rc5,<3.2.0'])
|
| 10 |
mim.install(['mmocr'])
|
|
|
|
| 11 |
|
| 12 |
import streamlit as st
|
| 13 |
import plotly.express as px
|
|
@@ -21,7 +22,7 @@ from PIL import Image, ImageColor
|
|
| 21 |
import PIL
|
| 22 |
import easyocr
|
| 23 |
from paddleocr import PaddleOCR
|
| 24 |
-
from mmocr.utils.ocr import MMOCR
|
| 25 |
import pytesseract
|
| 26 |
from pytesseract import Output
|
| 27 |
import os
|
|
@@ -80,9 +81,10 @@ def app():
|
|
| 80 |
plotly figure : confidence color scale figure
|
| 81 |
"""
|
| 82 |
# the readers considered
|
| 83 |
-
out_reader_type_list = ['EasyOCR', 'PPOCR', 'MMOCR', 'Tesseract']
|
| 84 |
-
out_reader_type_dict = {'EasyOCR': 0, 'PPOCR': 1, 'MMOCR': 2, 'Tesseract': 3}
|
| 85 |
-
|
|
|
|
| 86 |
# Columns for recognition details results
|
| 87 |
out_cols_size = [2] + [2,1]*(len(out_reader_type_list)-1) # Except Tesseract
|
| 88 |
|
|
@@ -123,7 +125,7 @@ def app():
|
|
| 123 |
'Tagalog': 'tl', 'Tamil': 'ta', 'Telugu': 'te', 'Turkish': 'tr', 'Ukranian': 'uk', \
|
| 124 |
'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy'}
|
| 125 |
|
| 126 |
-
out_dict_lang_mmocr = {'English & Chinese': 'en'}
|
| 127 |
|
| 128 |
out_dict_lang_tesseract = {'Afrikaans': 'afr','Albanian': 'sqi','Amharic': 'amh', \
|
| 129 |
'Arabic': 'ara', 'Armenian': 'hye','Assamese': 'asm','Azerbaijani - Cyrilic': 'aze_cyrl', \
|
|
@@ -156,7 +158,8 @@ def app():
|
|
| 156 |
'Uzbek - Cyrilic': 'uzb_cyrl','Uzbek': 'uzb','Vietnamese': 'vie','Welsh': 'cym', \
|
| 157 |
'Western Frisian': 'fry','Yiddish': 'yid','Yoruba': 'yor'}
|
| 158 |
|
| 159 |
-
out_list_dict_lang = [out_dict_lang_easyocr, out_dict_lang_ppocr,
|
|
|
|
| 160 |
out_dict_lang_tesseract]
|
| 161 |
|
| 162 |
# Initialization of detection form
|
|
@@ -221,19 +224,18 @@ def app():
|
|
| 221 |
return out_ocr
|
| 222 |
|
| 223 |
###
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
###
|
| 238 |
def init_readers(in_list_params):
|
| 239 |
"""Initialization of the readers, and return them as list
|
|
@@ -255,10 +257,10 @@ def app():
|
|
| 255 |
reader_ppocr = init_ppocr(in_list_params[1])
|
| 256 |
|
| 257 |
# - MMOCR
|
| 258 |
-
with st.spinner("MMOCR reader initialization in progress ..."):
|
| 259 |
-
|
| 260 |
|
| 261 |
-
out_list_readers = [reader_easyocr, reader_ppocr, reader_mmocr]
|
| 262 |
|
| 263 |
return out_list_readers
|
| 264 |
|
|
|
|
| 1 |
"""This Streamlit app allows you to compare, from a given image, the results of different solutions:
|
| 2 |
EasyOcr, PaddleOCR, MMOCR, Tesseract
|
| 3 |
"""
|
| 4 |
+
"""
|
| 5 |
import mim
|
| 6 |
|
| 7 |
mim.install(['mmengine>=0.7.1,<1.1.0'])
|
| 8 |
mim.install(['mmcv>=2.0.0rc4,<2.1.0'])
|
| 9 |
mim.install(['mmdet>=3.0.rc5,<3.2.0'])
|
| 10 |
mim.install(['mmocr'])
|
| 11 |
+
"""
|
| 12 |
|
| 13 |
import streamlit as st
|
| 14 |
import plotly.express as px
|
|
|
|
| 22 |
import PIL
|
| 23 |
import easyocr
|
| 24 |
from paddleocr import PaddleOCR
|
| 25 |
+
#from mmocr.utils.ocr import MMOCR
|
| 26 |
import pytesseract
|
| 27 |
from pytesseract import Output
|
| 28 |
import os
|
|
|
|
| 81 |
plotly figure : confidence color scale figure
|
| 82 |
"""
|
| 83 |
# the readers considered
|
| 84 |
+
#out_reader_type_list = ['EasyOCR', 'PPOCR', 'MMOCR', 'Tesseract']
|
| 85 |
+
#out_reader_type_dict = {'EasyOCR': 0, 'PPOCR': 1, 'MMOCR': 2, 'Tesseract': 3}
|
| 86 |
+
out_reader_type_list = ['EasyOCR', 'PPOCR', 'Tesseract']
|
| 87 |
+
out_reader_type_dict = {'EasyOCR': 0, 'PPOCR': 1, 'Tesseract': 2}
|
| 88 |
# Columns for recognition details results
|
| 89 |
out_cols_size = [2] + [2,1]*(len(out_reader_type_list)-1) # Except Tesseract
|
| 90 |
|
|
|
|
| 125 |
'Tagalog': 'tl', 'Tamil': 'ta', 'Telugu': 'te', 'Turkish': 'tr', 'Ukranian': 'uk', \
|
| 126 |
'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy'}
|
| 127 |
|
| 128 |
+
#out_dict_lang_mmocr = {'English & Chinese': 'en'}
|
| 129 |
|
| 130 |
out_dict_lang_tesseract = {'Afrikaans': 'afr','Albanian': 'sqi','Amharic': 'amh', \
|
| 131 |
'Arabic': 'ara', 'Armenian': 'hye','Assamese': 'asm','Azerbaijani - Cyrilic': 'aze_cyrl', \
|
|
|
|
| 158 |
'Uzbek - Cyrilic': 'uzb_cyrl','Uzbek': 'uzb','Vietnamese': 'vie','Welsh': 'cym', \
|
| 159 |
'Western Frisian': 'fry','Yiddish': 'yid','Yoruba': 'yor'}
|
| 160 |
|
| 161 |
+
out_list_dict_lang = [out_dict_lang_easyocr, out_dict_lang_ppocr, \
|
| 162 |
+
#out_dict_lang_mmocr, \
|
| 163 |
out_dict_lang_tesseract]
|
| 164 |
|
| 165 |
# Initialization of detection form
|
|
|
|
| 224 |
return out_ocr
|
| 225 |
|
| 226 |
###
|
| 227 |
+
#@st.experimental_memo(show_spinner=False)
|
| 228 |
+
#def init_mmocr(in_params):
|
| 229 |
+
# """Initialization of MMOCR reader
|
| 230 |
+
#
|
| 231 |
+
# Args:
|
| 232 |
+
# in_params (dict): dict with parameters
|
| 233 |
+
#
|
| 234 |
+
# Returns:
|
| 235 |
+
# mmocr reader: the ppocr reader instance
|
| 236 |
+
# """
|
| 237 |
+
# out_ocr = MMOCR(recog=None, **in_params[1])
|
| 238 |
+
# return out_ocr
|
|
|
|
| 239 |
###
|
| 240 |
def init_readers(in_list_params):
|
| 241 |
"""Initialization of the readers, and return them as list
|
|
|
|
| 257 |
reader_ppocr = init_ppocr(in_list_params[1])
|
| 258 |
|
| 259 |
# - MMOCR
|
| 260 |
+
#with st.spinner("MMOCR reader initialization in progress ..."):
|
| 261 |
+
# reader_mmocr = init_mmocr(in_list_params[2])
|
| 262 |
|
| 263 |
+
#out_list_readers = [reader_easyocr, reader_ppocr, reader_mmocr]
|
| 264 |
|
| 265 |
return out_list_readers
|
| 266 |
|