File size: 3,307 Bytes
0c84ee8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# -*- coding: utf-8 -*-
import os
import numpy as np
#import tensorflow as tf
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()
import cv2
import math

from .helpers import *
from .tfhelpers import Model

# Preloading trained model with activation function
# Loading is slow -> prevent multiple loads
print("Loading segmentation models...")
location = os.path.dirname(os.path.abspath(__file__))
CNN_model = Model(
    os.path.join(location, '../../models/gap-clas/CNN-CG'))
CNN_slider = (60, 30)
RNN_model = Model(
    os.path.join(location, '../../models/gap-clas/RNN/Bi-RNN-new'),
    'prediction')
RNN_slider = (60, 60)


def _classify(img, step=2, RNN=False, slider=(60, 60)):
    """Slice the image and return raw output of classifier."""
    length = (img.shape[1] - slider[1]) // 2 + 1
    if RNN:
        input_seq = np.zeros((1, length, slider[0]*slider[1]), dtype=np.float32)
        input_seq[0][:] = [img[:, loc * step: loc * step + slider[1]].flatten()
                           for loc in range(length)]
        pred = RNN_model.eval_feed({'inputs:0': input_seq,
                                    'length:0': [length],
                                    'keep_prob:0': 1})[0]
    else:
        input_seq = np.zeros((length, slider[0]*slider[1]), dtype=np.float32)
        input_seq[:] = [img[:, loc * step: loc * step + slider[1]].flatten()
                        for loc in range(length)]
        pred = CNN_model.run(input_seq)
        
    return pred
    

def segment(img, step=2, RNN=False, debug=False):
    """Take preprocessed image of word and
    returns array of positions separating characters.
    """
    slider = CNN_slider
    if RNN:
        slider = RNN_slider
    
    # Run the classifier
    pred = _classify(img, step=step, RNN=RNN, slider=slider)

    # Finalize the gap positions from raw prediction
    gaps = []
    last_gap = 0
    gap_count = 1
    gap_position_sum = slider[1] / 2
    first_gap = True
    gap_block_first = 0
    gap_block_last = slider[1] / 2

    for i, p in enumerate(pred):
        if p == 1:
            gap_position_sum += i * step + slider[1] / 2
            gap_block_last = i * step + slider[1] / 2
            gap_count += 1
            last_gap = 0
            if gap_block_first == 0:
                gap_block_first = i * step + slider[1] / 2
        else:
            if gap_count != 0 and last_gap >= 1:
                if first_gap:
                    gaps.append(int(gap_block_last))
                    first_gap = False
                else:
                    gaps.append(int(gap_position_sum // gap_count))
                gap_position_sum = 0
                gap_count = 0
            gap_block_first = 0
            last_gap += 1

    # Adding final gap position
    if gap_block_first != 0:
        gaps.append(int(gap_block_first))
    else:
        gap_position_sum += (len(pred) - 1) * 2 + slider[1]/2
        gaps.append(int(gap_position_sum / (gap_count + 1)))
        
    if debug:
        # Drawing lines
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        for gap in gaps:
            cv2.line(img,
                     ((int)(gap), 0),
                     ((int)(gap), slider[0]),
                     (0, 255, 0), 1)
        implt(img, t="Separated characters")
        
    return gaps