Spaces:

Alimustoofaa
/

Container-Number-OCR

Build error

App Files Files Community

Alimustoofaa commited on Aug 6, 2022

Commit

7ee7e3a

1 Parent(s): 29496ec

first commit

Browse files

Files changed (10) hide show

.gitignore +109 -0
app.py +39 -0
requirements.txt +11 -0
src/app/__init__.py +0 -0
src/app/crnn.py +90 -0
src/app/decoder.py +148 -0
src/app/main.py +28 -0
src/app/text_recognition.py +161 -0
src/utils/__init__.py +0 -0
src/utils/utils.py +73 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,109 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# dotenv
+.env
+# virtualenv
+.venv
+venv/
+ENV/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+# macOS
+*.DS_Store
+# IDEs
+.vscode/
+.vs/
+.idea/

app.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import os
+import string
+import gradio as gr
+from src.app.text_recognition import TextRecognition
+root_path = os.path.expanduser('~/.Halotec/Models')
+model_config = {
+    'filename'  : 'crnn_008000.pt',
+    'classes'   : string.digits+string.ascii_uppercase+'. ',
+    'url'       : 'https://github.com/Alimustoofaa/Research-OCR-License-Plate/releases/download/crnn/crnn_008000.pt',
+    'file_size' : 31379595,
+    'img_height': 32,
+    'img_width' : 100,
+    'map_to_seq_hidden': 64,
+    'rnn_hidden': 256,
+    'leaky_relu': False
+}
+model_ocr = TextRecognition(root_path, model_config, jic=True)
+def recognition(image):
+    result = model_ocr.recognition(image, decode='beam_search', beam_size=10)
+    txt, conf = result['text'], result['confidence']
+    return txt, conf
+title = "OCR License Plate Indonesia"
+css = ".image-preview {height: auto !important;}"
+iface = gr.Interface(
+    title   = title,
+    fn      = recognition,
+    inputs  = [gr.Image()],
+    outputs = ['text', 'text'],
+    css=css
+)
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+docopt==0.6.2
+numpy==1.17.2
+opencv-python==4.5.1.48
+pillow==6.1.0
+scipy==1.5.2
+six==1.12.0
+#torch==1.2.0
+tqdm==4.49.0
+torch
+torchvision
+torchaudio

src/app/__init__.py ADDED Viewed

File without changes

src/app/crnn.py ADDED Viewed

	@@ -0,0 +1,90 @@

+import torch.nn as nn
+class CRNN(nn.Module):
+	def __init__(self, img_channel, img_height, img_width, num_class,
+				 map_to_seq_hidden=64, rnn_hidden=256, leaky_relu=False):
+		super(CRNN, self).__init__()
+		self.cnn, (output_channel, output_height, output_width) = \
+			self._cnn_backbone(img_channel, img_height, img_width, leaky_relu)
+		self.map_to_seq = nn.Linear(output_channel * output_height, map_to_seq_hidden)
+		self.rnn1 = nn.LSTM(map_to_seq_hidden, rnn_hidden, bidirectional=True)
+		self.rnn2 = nn.LSTM(2 * rnn_hidden, rnn_hidden, bidirectional=True)
+		self.dense = nn.Linear(2 * rnn_hidden, num_class)
+	def _cnn_backbone(self, img_channel, img_height, img_width, leaky_relu):
+		assert img_height % 16 == 0
+		assert img_width % 4 == 0
+		channels = [img_channel, 64, 128, 256, 256, 512, 512, 512]
+		kernel_sizes = [3, 3, 3, 3, 3, 3, 2]
+		strides = [1, 1, 1, 1, 1, 1, 1]
+		paddings = [1, 1, 1, 1, 1, 1, 0]
+		cnn = nn.Sequential()
+		def conv_relu(i, batch_norm=False):
+			# shape of input: (batch, input_channel, height, width)
+			input_channel = channels[i]
+			output_channel = channels[i+1]
+			cnn.add_module(
+				f'conv{i}',
+				nn.Conv2d(input_channel, output_channel, kernel_sizes[i], strides[i], paddings[i])
+			)
+			if batch_norm:
+				cnn.add_module(f'batchnorm{i}', nn.BatchNorm2d(output_channel))
+			relu = nn.LeakyReLU(0.2, inplace=True) if leaky_relu else nn.ReLU(inplace=True)
+			cnn.add_module(f'relu{i}', relu)
+		# size of image: (channel, height, width) = (img_channel, img_height, img_width)
+		conv_relu(0)
+		cnn.add_module('pooling0', nn.MaxPool2d(kernel_size=2, stride=2))
+		# (64, img_height // 2, img_width // 2)
+		conv_relu(1)
+		cnn.add_module('pooling1', nn.MaxPool2d(kernel_size=2, stride=2))
+		# (128, img_height // 4, img_width // 4)
+		conv_relu(2)
+		conv_relu(3)
+		cnn.add_module(
+			'pooling2',
+			nn.MaxPool2d(kernel_size=(2, 1))
+		)  # (256, img_height // 8, img_width // 4)
+		conv_relu(4, batch_norm=True)
+		conv_relu(5, batch_norm=True)
+		cnn.add_module(
+			'pooling3',
+			nn.MaxPool2d(kernel_size=(2, 1))
+		)  # (512, img_height // 16, img_width // 4)
+		conv_relu(6)  # (512, img_height // 16 - 1, img_width // 4 - 1)
+		output_channel, output_height, output_width = \
+			channels[-1], img_height // 16 - 1, img_width // 4 - 1
+		return cnn, (output_channel, output_height, output_width)
+	def forward(self, images):
+		# shape of images: (batch, channel, height, width)
+		conv = self.cnn(images)
+		batch, channel, height, width = conv.size()
+		conv = conv.view(batch, channel * height, width)
+		conv = conv.permute(2, 0, 1)  # (width, batch, feature)
+		seq = self.map_to_seq(conv)
+		recurrent, _ = self.rnn1(seq)
+		recurrent, _ = self.rnn2(recurrent)
+		output = self.dense(recurrent)
+		return output  # shape: (seq_len, batch, num_class)

src/app/decoder.py ADDED Viewed

	@@ -0,0 +1,148 @@

+from collections import defaultdict
+import torch
+import numpy as np
+from scipy.special import logsumexp  # log(p1 + p2) = logsumexp([log_p1, log_p2])
+NINF = -1 * float('inf')
+DEFAULT_EMISSION_THRESHOLD = 0.01
+def _reconstruct(labels, blank=0):
+    new_labels = []
+    # merge same labels
+    previous = None
+    for l in labels:
+        if l != previous:
+            new_labels.append(l)
+            previous = l
+    # delete blank
+    new_labels = [l for l in new_labels if l != blank]
+    return new_labels
+def greedy_decode(emission_log_prob, blank=0, **kwargs):
+    labels = np.argmax(emission_log_prob, axis=-1)
+    labels = _reconstruct(labels, blank=blank)
+    return labels
+def beam_search_decode(emission_log_prob, blank=0, **kwargs):
+    beam_size = kwargs['beam_size']
+    emission_threshold = kwargs.get('emission_threshold', np.log(DEFAULT_EMISSION_THRESHOLD))
+    length, class_count = emission_log_prob.shape
+    beams = [([], 0)]  # (prefix, accumulated_log_prob)
+    for t in range(length):
+        new_beams = []
+        for prefix, accumulated_log_prob in beams:
+            for c in range(class_count):
+                log_prob = emission_log_prob[t, c]
+                if log_prob < emission_threshold:
+                    continue
+                new_prefix = prefix + [c]
+                # log(p1 * p2) = log_p1 + log_p2
+                new_accu_log_prob = accumulated_log_prob + log_prob
+                new_beams.append((new_prefix, new_accu_log_prob))
+        # sorted by accumulated_log_prob
+        new_beams.sort(key=lambda x: x[1], reverse=True)
+        beams = new_beams[:beam_size]
+    # sum up beams to produce labels
+    total_accu_log_prob = {}
+    for prefix, accu_log_prob in beams:
+        labels = tuple(_reconstruct(prefix, blank))
+        # log(p1 + p2) = logsumexp([log_p1, log_p2])
+        total_accu_log_prob[labels] = \
+            logsumexp([accu_log_prob, total_accu_log_prob.get(labels, NINF)])
+    labels_beams = [(list(labels), accu_log_prob)
+                    for labels, accu_log_prob in total_accu_log_prob.items()]
+    labels_beams.sort(key=lambda x: x[1], reverse=True)
+    labels = labels_beams[0][0]
+    return labels
+def prefix_beam_decode(emission_log_prob, blank=0, **kwargs):
+    beam_size = kwargs['beam_size']
+    emission_threshold = kwargs.get('emission_threshold', np.log(DEFAULT_EMISSION_THRESHOLD))
+    length, class_count = emission_log_prob.shape
+    beams = [(tuple(), (0, NINF))]  # (prefix, (blank_log_prob, non_blank_log_prob))
+    # initial of beams: (empty_str, (log(1.0), log(0.0)))
+    for t in range(length):
+        new_beams_dict = defaultdict(lambda: (NINF, NINF))  # log(0.0) = NINF
+        for prefix, (lp_b, lp_nb) in beams:
+            for c in range(class_count):
+                log_prob = emission_log_prob[t, c]
+                if log_prob < emission_threshold:
+                    continue
+                end_t = prefix[-1] if prefix else None
+                # if new_prefix == prefix
+                new_lp_b, new_lp_nb = new_beams_dict[prefix]
+                if c == blank:
+                    new_beams_dict[prefix] = (
+                        logsumexp([new_lp_b, lp_b + log_prob, lp_nb + log_prob]),
+                        new_lp_nb
+                    )
+                    continue
+                if c == end_t:
+                    new_beams_dict[prefix] = (
+                        new_lp_b,
+                        logsumexp([new_lp_nb, lp_nb + log_prob])
+                    )
+                # if new_prefix == prefix + (c,)
+                new_prefix = prefix + (c,)
+                new_lp_b, new_lp_nb = new_beams_dict[new_prefix]
+                if c != end_t:
+                    new_beams_dict[new_prefix] = (
+                        new_lp_b,
+                        logsumexp([new_lp_nb, lp_b + log_prob, lp_nb + log_prob])
+                    )
+                else:
+                    new_beams_dict[new_prefix] = (
+                        new_lp_b,
+                        logsumexp([new_lp_nb, lp_b + log_prob])
+                    )
+        # sorted by log(blank_prob + non_blank_prob)
+        beams = sorted(new_beams_dict.items(), key=lambda x: logsumexp(x[1]), reverse=True)
+        beams = beams[:beam_size]
+    labels = list(beams[0][0])
+    return labels
+def ctc_decode(log_probs, label2char=None, blank=0, method='beam_search', beam_size=10):
+    try:
+        emission_log_probs = np.transpose(log_probs.cpu().numpy(), (1, 0, 2))
+    except RuntimeError:
+        emission_log_probs = np.transpose(log_probs.detach().numpy(), (1, 0, 2))
+    # size of emission_log_probs: (batch, length, class)
+    decoders = {
+        'greedy': greedy_decode,
+        'beam_search': beam_search_decode,
+        'prefix_beam_search': prefix_beam_decode,
+    }
+    decoder = decoders[method]
+    decoded_list = []
+    for emission_log_prob in emission_log_probs:
+        decoded = decoder(emission_log_prob, blank=blank, beam_size=beam_size)
+        if label2char:
+            decoded = [label2char[l] for l in decoded]
+        decoded_list.append(decoded)
+    return decoded_list

src/app/main.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from text_recognition import TextRecognition
+if __name__ == '__main__':
+	import os
+	import cv2
+	import time
+	import string
+	root_path = os.path.expanduser('~/.Halotec/Models')
+	model_config = {
+		'filename'  : 'crnn_008000.pt',
+		'classes'   : string.digits+string.ascii_uppercase+'. ',
+		'url'       : None,
+		'file_size' : 592694,
+		'img_height': 32,
+		'img_width' : 100,
+		'map_to_seq_hidden': 64,
+		'rnn_hidden': 256,
+		'leaky_relu': False
+	}
+	text_recognition = TextRecognition(root_path, model_config, jic=True)
+	image = cv2.imread('./images/12022041113414598_14.jpg')
+	start = time.time()
+	for i in range(10):
+		result = text_recognition.recognition(image, decode='beam_search', beam_size=10)
+		print(result)
+	print(time.time() - start)

src/app/text_recognition.py ADDED Viewed

	@@ -0,0 +1,161 @@

+"""
+@Author     : Ali Mustofa HALOTEC
+@Module     : Character Recognition Neural Network
+@Created on : 2 Agust 2022
+"""
+#!/usr/bin/env python3
+# Path: src/apps/char_recognition.py
+import os
+import cv2
+import sys
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .crnn import CRNN
+from .decoder import ctc_decode
+try:
+	from src.utils.utils import download_and_unzip_model
+except ImportError:
+	SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+	sys.path.append(os.path.dirname(SCRIPT_DIR))
+	from utils.utils import download_and_unzip_model
+class TextRecognition:
+	def __init__(self, root_path:str, model_config:dict, jic: bool=True) -> None:
+		self.jic            = jic
+		self.root_path      = root_path
+		self.model_config   = model_config
+		self.model_name     = f'{root_path}/{model_config["filename"]}'
+		self.classes        = {i+1:v for i,v in enumerate(model_config['classes'])}
+		self.device         = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
+		self.model          = self.__load_model()
+		if jic: self.model  = self.__jic_trace(self.model)
+	@staticmethod
+	def __crnn_model(config) -> nn.Module:
+		model = CRNN(
+			img_channel = 1,
+			img_height  = config['img_height'],
+			img_width   = config['img_width'],
+			num_class   = len(config['classes'])+1,
+			map_to_seq_hidden = config['map_to_seq_hidden'],
+			rnn_hidden  = config['rnn_hidden'],
+			leaky_relu  = config['leaky_relu']
+		)
+		return model
+	@staticmethod
+	def __jic_trace(model:nn.Module) -> torch.jit.TracedModule:
+		'''
+		JIT tracing
+		@params:
+			- model: nn.Module
+		'''
+		return torch.jit.trace(model, torch.rand(1, 1, 32, 100))
+	@staticmethod
+	def __check_model(root_path:str, model_config:dict) -> None:
+		if not os.path.isfile(f'{root_path}/{model_config["filename"]}'):
+			download_and_unzip_model(
+				root_dir    = root_path,
+				name        = model_config['filename'],
+				url         = model_config['url'],
+				file_size   = model_config['file_size'],
+				unzip       = False
+			)
+		else: print('Load model ...')
+	def __load_model(self) -> nn.Module:
+		'''
+		Load model from file
+		@return:
+			- model: nn.Module
+		'''
+		self.__check_model(self.root_path, self.model_config)
+		model = self.__crnn_model(self.model_config)
+		model.load_state_dict(torch.load(self.model_name, map_location=self.device))
+		model.to(self.device)
+		return model.eval()
+	@staticmethod
+	def __image_transform(image:np.ndarray, height: int=32, width: int=100) -> torch.Tensor:
+		'''
+		Image transform
+		@params:
+			- image: np.ndarray
+		@return:
+			- image: torch.Tensor
+		'''
+		image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+		image = cv2.resize(image, (width, height))
+		image = image.reshape(1, height, width)
+		image = (image / 127.5) - 1.0
+		image = torch.FloatTensor(image)
+		return image.unsqueeze(0)
+	def recognition(
+			self,
+			image: np.array,
+			decode: str = 'beam_search',
+			beam_size: int = 10
+		) -> dict:
+		'''
+		Recognition text from image
+		@params:
+			- image: np.ndarray
+			- decode: str -> ['beam_search', 'greedy', 'prefix_beam_search']
+			- beam_size: int -> beam size for beam search
+		@return:
+			- result: dict -> {'text': str, 'confidence': float}
+		'''
+		assert decode in ['beam_search', 'greedy', 'prefix_beam_search'], 'Decode Failed'
+		image_t = self.__image_transform(image)
+		# recognize
+		with torch.no_grad():
+			output = self.model(image_t)
+		log_probs = F.log_softmax(output, dim=2)
+		# decode
+		preds = ctc_decode(
+			log_probs, method=decode, beam_size=beam_size,
+			blank=0, label2char=self.classes)
+		# calculate confidence
+		exps = torch.exp(log_probs)
+		try:
+			probs = sum(torch.max(exps, dim=2)[0]/len(exps)).detach().numpy()[0]
+		except RuntimeError:
+			probs = sum(torch.max(exps, dim=2)[0]/len(exps)).cpu().numpy()[0]
+		preds, conf = ''.join(preds[0]), round(probs,2)
+		return {'text': preds, 'confidence': conf}
+if __name__ == '__main__':
+	import time
+	import string
+	root_path = os.path.expanduser('~/.Halotec/Models')
+	model_config = {
+		'filename'  : 'crnn_008000.pt',
+		'classes'   : string.digits+string.ascii_uppercase+'. ',
+		'url'       : None,
+		'file_size' : 592694,
+		'img_height': 32,
+		'img_width' : 100,
+		'map_to_seq_hidden': 64,
+		'rnn_hidden': 256,
+		'leaky_relu': False
+	}
+	text_recognition = TextRecognition(root_path, model_config, jic=True)
+	image = cv2.imread('./images/12022041114405685_0.jpg')
+	start = time.time()
+	for i in range(10):
+		result = text_recognition.recognition(image, decode='beam_search', beam_size=10)
+		print(result)
+	print(time.time() - start)

src/utils/__init__.py ADDED Viewed

File without changes

src/utils/utils.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import os
+import cv2
+import base64
+import requests
+import numpy as np
+from tqdm import tqdm
+from pathlib import Path
+from zipfile import ZipFile
+def download_and_unzip_model(root_dir:str, name:str,
+    url:str, file_size:int, unzip:bool = False):
+	'''
+	Checking model in model_path
+	download model if file not found
+	@params:
+ 		root_dir(str): The root directory of model.
+		name(str): The name of model.
+		url(str): The url of model.
+		file_size(int): The size of model.
+		unzip(bool): Unzip the model or not.
+	'''
+	Path(root_dir).mkdir(parents=True, exist_ok=True)
+	# check if model is already or not
+	print(f'Downloading {root_dir.split("/")[-1]} model, please wait.')
+	response = requests.get(url, stream=True)
+	progress = tqdm(response.iter_content(1024),
+				f'Downloading model',
+				total=file_size, unit='B',
+				unit_scale=True, unit_divisor=1024)
+	save_dir = f'{root_dir}/{name}'
+	with open(save_dir, 'wb') as f:
+		for data in progress:
+			f.write(data)
+			progress.update(len(data))
+		print(f'Done downloading  {root_dir.split("/")[-1]} model.')
+	# unzip model
+	if unzip:
+		with ZipFile(save_dir, 'r') as zip_obj:
+			zip_obj.extractall(root_dir)
+			print(f'Done unzip {root_dir.split("/")[-1]} model.')
+		os.remove(save_dir)
+def encode_image2string(image):
+	image_list = cv2.imencode('.jpg', image)[1]
+	image_bytes = image_list.tobytes()
+	image_encoded = base64.b64encode(image_bytes)
+	return image_encoded
+def decode_string2image(image_encoded):
+	jpg_original = base64.b64decode(image_encoded)
+	jpg_as_np = np.frombuffer(jpg_original, dtype=np.uint8)
+	image = cv2.imdecode(jpg_as_np, flags=1)
+	return image
+def resize_image(image, size_percent):
+    '''
+    Resize an image so that its longest edge equals to the given size.
+    Args:
+        image(cv2.Image): The input image.
+        size_percent(int): The size of longest edge.
+    Returns:
+        image(cv2.Image): The output image.
+    '''
+    width   = int(image.shape[1] * size_percent / 100)
+    height  = int(image.shape[0] * size_percent / 100)
+    dim     = (width, height)
+    # resize image
+    resized = cv2.resize(image, dim, interpolation = cv2.INTER_AREA)
+    return resized