mohamed12ahmed commited on
Commit
c7012e2
·
verified ·
1 Parent(s): 90d252f

Upload 22 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,19 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ output/cell_pic.jpg filter=lfs diff=lfs merge=lfs -text
37
+ output/chart.JPG filter=lfs diff=lfs merge=lfs -text
38
+ output/desk.JPG filter=lfs diff=lfs merge=lfs -text
39
+ output/dollar_bill.JPG filter=lfs diff=lfs merge=lfs -text
40
+ output/math_cheat_sheet.JPG filter=lfs diff=lfs merge=lfs -text
41
+ output/notepad.JPG filter=lfs diff=lfs merge=lfs -text
42
+ output/receipt.jpg filter=lfs diff=lfs merge=lfs -text
43
+ output/tax.jpeg filter=lfs diff=lfs merge=lfs -text
44
+ sample_images/cell_pic.jpg filter=lfs diff=lfs merge=lfs -text
45
+ sample_images/chart.JPG filter=lfs diff=lfs merge=lfs -text
46
+ sample_images/desk.JPG filter=lfs diff=lfs merge=lfs -text
47
+ sample_images/dollar_bill.JPG filter=lfs diff=lfs merge=lfs -text
48
+ sample_images/math_cheat_sheet.JPG filter=lfs diff=lfs merge=lfs -text
49
+ sample_images/notepad.JPG filter=lfs diff=lfs merge=lfs -text
50
+ sample_images/receipt.jpg filter=lfs diff=lfs merge=lfs -text
51
+ sample_images/tax.jpeg filter=lfs diff=lfs merge=lfs -text
output/cell_pic.jpg ADDED

Git LFS Details

  • SHA256: 1e84a796d9c5eaef08bda0e414eaf62b42ff05305ad1cdbad4767beb0b73ad83
  • Pointer size: 132 Bytes
  • Size of remote file: 1.79 MB
output/chart.JPG ADDED

Git LFS Details

  • SHA256: 03ba16918b8de54c18393eaf88bb4cb714b563b60c36c0e0ea5eb815a556e14d
  • Pointer size: 132 Bytes
  • Size of remote file: 1.06 MB
output/desk.JPG ADDED

Git LFS Details

  • SHA256: 8306ec9783cfe46fc4f6c7bb85005e6a9cc8473726fb004b3c6bdbeb93c1c808
  • Pointer size: 131 Bytes
  • Size of remote file: 929 kB
output/dollar_bill.JPG ADDED

Git LFS Details

  • SHA256: 7b65c2a960b3f048ccbee1331f4d367959484fed5e246dd6a3767001d79e15ec
  • Pointer size: 132 Bytes
  • Size of remote file: 1.29 MB
output/math_cheat_sheet.JPG ADDED

Git LFS Details

  • SHA256: 35439476cf5982b36e4168086a6a63b851c43325032bc1f5332cc5152e427c11
  • Pointer size: 132 Bytes
  • Size of remote file: 1.77 MB
output/notepad.JPG ADDED

Git LFS Details

  • SHA256: bbe590a82129654aee73c3b3613c0fcae789f8be78ab92d8dcb0a4ee8f5443a6
  • Pointer size: 131 Bytes
  • Size of remote file: 744 kB
output/receipt.jpg ADDED

Git LFS Details

  • SHA256: b2fc436260bbc7e0e024a29c10a9bca72c6d3f7528ca1e0eb3c51959ce9933c6
  • Pointer size: 131 Bytes
  • Size of remote file: 478 kB
output/tax.jpeg ADDED

Git LFS Details

  • SHA256: bd2c39e20c82cfa1a4d97dd339fe784be40264704b5f3597268bc5a250c2695e
  • Pointer size: 131 Bytes
  • Size of remote file: 417 kB
polygon_interacter.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from matplotlib.lines import Line2D
3
+ from matplotlib.artist import Artist
4
+
5
+
6
+ class PolygonInteractor(object):
7
+ """
8
+ An polygon editor
9
+ """
10
+
11
+ showverts = True
12
+ epsilon = 5 # max pixel distance to count as a vertex hit
13
+
14
+ def __init__(self, ax, poly):
15
+ if poly.figure is None:
16
+ raise RuntimeError('You must first add the polygon to a figure or canvas before defining the interactor')
17
+ self.ax = ax
18
+ canvas = poly.figure.canvas
19
+ self.poly = poly
20
+
21
+ x, y = zip(*self.poly.xy)
22
+ self.line = Line2D(x, y, marker='o', markerfacecolor='r', animated=True)
23
+ self.ax.add_line(self.line)
24
+
25
+ cid = self.poly.add_callback(self.poly_changed)
26
+ self._ind = None # the active vert
27
+
28
+ canvas.mpl_connect('draw_event', self.draw_callback)
29
+ canvas.mpl_connect('button_press_event', self.button_press_callback)
30
+ canvas.mpl_connect('button_release_event', self.button_release_callback)
31
+ canvas.mpl_connect('motion_notify_event', self.motion_notify_callback)
32
+ self.canvas = canvas
33
+
34
+ def get_poly_points(self):
35
+ return np.asarray(self.poly.xy)
36
+
37
+ def draw_callback(self, event):
38
+ self.background = self.canvas.copy_from_bbox(self.ax.bbox)
39
+ self.ax.draw_artist(self.poly)
40
+ self.ax.draw_artist(self.line)
41
+ self.canvas.blit(self.ax.bbox)
42
+
43
+ def poly_changed(self, poly):
44
+ 'this method is called whenever the polygon object is called'
45
+ # only copy the artist props to the line (except visibility)
46
+ vis = self.line.get_visible()
47
+ Artist.update_from(self.line, poly)
48
+ self.line.set_visible(vis) # don't use the poly visibility state
49
+
50
+ def get_ind_under_point(self, event):
51
+ 'get the index of the vertex under point if within epsilon tolerance'
52
+
53
+ # display coords
54
+ xy = np.asarray(self.poly.xy)
55
+ xyt = self.poly.get_transform().transform(xy)
56
+ xt, yt = xyt[:, 0], xyt[:, 1]
57
+ d = np.sqrt((xt - event.x)**2 + (yt - event.y)**2)
58
+ indseq = np.nonzero(np.equal(d, np.amin(d)))[0]
59
+ ind = indseq[0]
60
+
61
+ if d[ind] >= self.epsilon:
62
+ ind = None
63
+
64
+ return ind
65
+
66
+ def button_press_callback(self, event):
67
+ 'whenever a mouse button is pressed'
68
+ if not self.showverts:
69
+ return
70
+ if event.inaxes is None:
71
+ return
72
+ if event.button != 1:
73
+ return
74
+ self._ind = self.get_ind_under_point(event)
75
+
76
+ def button_release_callback(self, event):
77
+ 'whenever a mouse button is released'
78
+ if not self.showverts:
79
+ return
80
+ if event.button != 1:
81
+ return
82
+ self._ind = None
83
+
84
+ def motion_notify_callback(self, event):
85
+ 'on mouse movement'
86
+ if not self.showverts:
87
+ return
88
+ if self._ind is None:
89
+ return
90
+ if event.inaxes is None:
91
+ return
92
+ if event.button != 1:
93
+ return
94
+ x, y = event.xdata, event.ydata
95
+
96
+ self.poly.xy[self._ind] = x, y
97
+ if self._ind == 0:
98
+ self.poly.xy[-1] = x, y
99
+ elif self._ind == len(self.poly.xy) - 1:
100
+ self.poly.xy[0] = x, y
101
+ self.line.set_data(zip(*self.poly.xy))
102
+
103
+ self.canvas.restore_region(self.background)
104
+ self.ax.draw_artist(self.poly)
105
+ self.ax.draw_artist(self.line)
106
+ self.canvas.blit(self.ax.bbox)
pyimagesearch/__init__.py ADDED
File without changes
pyimagesearch/imutils.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import the necessary packages
2
+ import numpy as np
3
+ import cv2
4
+
5
+ def translate(image, x, y):
6
+ # Define the translation matrix and perform the translation
7
+ M = np.float32([[1, 0, x], [0, 1, y]])
8
+ shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
9
+
10
+ # Return the translated image
11
+ return shifted
12
+
13
+ def rotate(image, angle, center = None, scale = 1.0):
14
+ # Grab the dimensions of the image
15
+ (h, w) = image.shape[:2]
16
+
17
+ # If the center is None, initialize it as the center of
18
+ # the image
19
+ if center is None:
20
+ center = (w / 2, h / 2)
21
+
22
+ # Perform the rotation
23
+ M = cv2.getRotationMatrix2D(center, angle, scale)
24
+ rotated = cv2.warpAffine(image, M, (w, h))
25
+
26
+ # Return the rotated image
27
+ return rotated
28
+
29
+ def resize(image, width = None, height = None, inter = cv2.INTER_AREA):
30
+ # initialize the dimensions of the image to be resized and
31
+ # grab the image size
32
+ dim = None
33
+ (h, w) = image.shape[:2]
34
+
35
+ # if both the width and height are None, then return the
36
+ # original image
37
+ if width is None and height is None:
38
+ return image
39
+
40
+ # check to see if the width is None
41
+ if width is None:
42
+ # calculate the ratio of the height and construct the
43
+ # dimensions
44
+ r = height / float(h)
45
+ dim = (int(w * r), height)
46
+
47
+ # otherwise, the height is None
48
+ else:
49
+ # calculate the ratio of the width and construct the
50
+ # dimensions
51
+ r = width / float(w)
52
+ dim = (width, int(h * r))
53
+
54
+ # resize the image
55
+ resized = cv2.resize(image, dim, interpolation = inter)
56
+
57
+ # return the resized image
58
+ return resized
pyimagesearch/transform.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import the necessary packages
2
+ from scipy.spatial import distance as dist
3
+ import numpy as np
4
+ import cv2
5
+
6
+ def order_points(pts):
7
+ # sort the points based on their x-coordinates
8
+ xSorted = pts[np.argsort(pts[:, 0]), :]
9
+
10
+ # grab the left-most and right-most points from the sorted
11
+ # x-roodinate points
12
+ leftMost = xSorted[:2, :]
13
+ rightMost = xSorted[2:, :]
14
+
15
+ # now, sort the left-most coordinates according to their
16
+ # y-coordinates so we can grab the top-left and bottom-left
17
+ # points, respectively
18
+ leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
19
+ (tl, bl) = leftMost
20
+
21
+ # now that we have the top-left coordinate, use it as an
22
+ # anchor to calculate the Euclidean distance between the
23
+ # top-left and right-most points; by the Pythagorean
24
+ # theorem, the point with the largest distance will be
25
+ # our bottom-right point
26
+ D = dist.cdist(tl[np.newaxis], rightMost, "euclidean")[0]
27
+ (br, tr) = rightMost[np.argsort(D)[::-1], :]
28
+
29
+ # return the coordinates in top-left, top-right,
30
+ # bottom-right, and bottom-left order
31
+ return np.array([tl, tr, br, bl], dtype = "float32")
32
+
33
+ def four_point_transform(image, pts):
34
+ # obtain a consistent order of the points and unpack them
35
+ # individually
36
+ rect = order_points(pts)
37
+ (tl, tr, br, bl) = rect
38
+
39
+ # compute the width of the new image, which will be the
40
+ # maximum distance between bottom-right and bottom-left
41
+ # x-coordiates or the top-right and top-left x-coordinates
42
+ widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
43
+ widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
44
+ maxWidth = max(int(widthA), int(widthB))
45
+
46
+ # compute the height of the new image, which will be the
47
+ # maximum distance between the top-right and bottom-right
48
+ # y-coordinates or the top-left and bottom-left y-coordinates
49
+ heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
50
+ heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
51
+ maxHeight = max(int(heightA), int(heightB))
52
+
53
+ # now that we have the dimensions of the new image, construct
54
+ # the set of destination points to obtain a "birds eye view",
55
+ # (i.e. top-down view) of the image, again specifying points
56
+ # in the top-left, top-right, bottom-right, and bottom-left
57
+ # order
58
+ dst = np.array([
59
+ [0, 0],
60
+ [maxWidth - 1, 0],
61
+ [maxWidth - 1, maxHeight - 1],
62
+ [0, maxHeight - 1]], dtype = "float32")
63
+
64
+ # compute the perspective transform matrix and then apply it
65
+ M = cv2.getPerspectiveTransform(rect, dst)
66
+ warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
67
+
68
+ # return the warped image
69
+ return warped
requirements.txt CHANGED
Binary files a/requirements.txt and b/requirements.txt differ
 
sample_images/cell_pic.jpg ADDED

Git LFS Details

  • SHA256: 951f33af39289a2b00106fa98e705e3fc8ddfb3a1a3461e1186ef540d15f0c64
  • Pointer size: 132 Bytes
  • Size of remote file: 2.5 MB
sample_images/chart.JPG ADDED

Git LFS Details

  • SHA256: 59cb72bb0585097a0a1cdec1fdd48a300724fe7d1d78d0fd1bd7dae353b0448a
  • Pointer size: 132 Bytes
  • Size of remote file: 1.76 MB
sample_images/desk.JPG ADDED

Git LFS Details

  • SHA256: 471d600535151d97d11cf2b13361bbcc72836d1dcdd10678da3450485e01de81
  • Pointer size: 132 Bytes
  • Size of remote file: 1.67 MB
sample_images/dollar_bill.JPG ADDED

Git LFS Details

  • SHA256: d2d01b392692809c1dcdb7f5a00a3c691c9e59b049faf6b4fa3e68e6a0cb4e5a
  • Pointer size: 132 Bytes
  • Size of remote file: 1.99 MB
sample_images/math_cheat_sheet.JPG ADDED

Git LFS Details

  • SHA256: f9662be68bb73238d9a68564eccdf0a394c3d10e958c853fa2ff84943c0f10c5
  • Pointer size: 132 Bytes
  • Size of remote file: 1.02 MB
sample_images/notepad.JPG ADDED

Git LFS Details

  • SHA256: 4a0a92ffa83fd1a17337b82d1790db36304e8c8080f6b0ef2ef19003d728da1c
  • Pointer size: 132 Bytes
  • Size of remote file: 1.55 MB
sample_images/receipt.jpg ADDED

Git LFS Details

  • SHA256: baff7d961e444164eb1b5680bace59709f584c6dbb5a5ccd44708499396cc325
  • Pointer size: 132 Bytes
  • Size of remote file: 2.15 MB
sample_images/tax.jpeg ADDED

Git LFS Details

  • SHA256: e3f6fbdff423e459d60e50b6ae16989f368caa3ce371a016675cfb35771931d8
  • Pointer size: 131 Bytes
  • Size of remote file: 338 kB
scan.py ADDED
@@ -0,0 +1,333 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # USAGE:
2
+ # python scan.py (--images <IMG_DIR> | --image <IMG_PATH>) [-i]
3
+ # For example, to scan a single image with interactive mode:
4
+ # python scan.py --image sample_images/desk.JPG -i
5
+ # To scan all images in a directory automatically:
6
+ # python scan.py --images sample_images
7
+
8
+ # Scanned images will be output to directory named 'output'
9
+
10
+ from pyimagesearch import transform
11
+ from pyimagesearch import imutils
12
+ from scipy.spatial import distance as dist
13
+ from matplotlib.patches import Polygon
14
+ import polygon_interacter as poly_i
15
+ import numpy as np
16
+ import matplotlib.pyplot as plt
17
+ import itertools
18
+ import math
19
+ import cv2
20
+ from pylsd.lsd import lsd
21
+
22
+ import argparse
23
+ import os
24
+
25
+ class DocScanner(object):
26
+ """An image scanner"""
27
+
28
+ def __init__(self, interactive=False, MIN_QUAD_AREA_RATIO=0.25, MAX_QUAD_ANGLE_RANGE=40):
29
+ """
30
+ Args:
31
+ interactive (boolean): If True, user can adjust screen contour before
32
+ transformation occurs in interactive pyplot window.
33
+ MIN_QUAD_AREA_RATIO (float): A contour will be rejected if its corners
34
+ do not form a quadrilateral that covers at least MIN_QUAD_AREA_RATIO
35
+ of the original image. Defaults to 0.25.
36
+ MAX_QUAD_ANGLE_RANGE (int): A contour will also be rejected if the range
37
+ of its interior angles exceeds MAX_QUAD_ANGLE_RANGE. Defaults to 40.
38
+ """
39
+ self.interactive = interactive
40
+ self.MIN_QUAD_AREA_RATIO = MIN_QUAD_AREA_RATIO
41
+ self.MAX_QUAD_ANGLE_RANGE = MAX_QUAD_ANGLE_RANGE
42
+
43
+ def filter_corners(self, corners, min_dist=20):
44
+ """Filters corners that are within min_dist of others"""
45
+ def predicate(representatives, corner):
46
+ return all(dist.euclidean(representative, corner) >= min_dist
47
+ for representative in representatives)
48
+
49
+ filtered_corners = []
50
+ for c in corners:
51
+ if predicate(filtered_corners, c):
52
+ filtered_corners.append(c)
53
+ return filtered_corners
54
+
55
+ def angle_between_vectors_degrees(self, u, v):
56
+ """Returns the angle between two vectors in degrees"""
57
+ return np.degrees(
58
+ math.acos(np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))))
59
+
60
+ def get_angle(self, p1, p2, p3):
61
+ """
62
+ Returns the angle between the line segment from p2 to p1
63
+ and the line segment from p2 to p3 in degrees
64
+ """
65
+ a = np.radians(np.array(p1))
66
+ b = np.radians(np.array(p2))
67
+ c = np.radians(np.array(p3))
68
+
69
+ avec = a - b
70
+ cvec = c - b
71
+
72
+ return self.angle_between_vectors_degrees(avec, cvec)
73
+
74
+ def angle_range(self, quad):
75
+ """
76
+ Returns the range between max and min interior angles of quadrilateral.
77
+ The input quadrilateral must be a numpy array with vertices ordered clockwise
78
+ starting with the top left vertex.
79
+ """
80
+ tl, tr, br, bl = quad
81
+ ura = self.get_angle(tl[0], tr[0], br[0])
82
+ ula = self.get_angle(bl[0], tl[0], tr[0])
83
+ lra = self.get_angle(tr[0], br[0], bl[0])
84
+ lla = self.get_angle(br[0], bl[0], tl[0])
85
+
86
+ angles = [ura, ula, lra, lla]
87
+ return np.ptp(angles)
88
+
89
+ def get_corners(self, img):
90
+ """
91
+ Returns a list of corners ((x, y) tuples) found in the input image. With proper
92
+ pre-processing and filtering, it should output at most 10 potential corners.
93
+ This is a utility function used by get_contours. The input image is expected
94
+ to be rescaled and Canny filtered prior to be passed in.
95
+ """
96
+ lines = lsd(img)
97
+
98
+ # massages the output from LSD
99
+ # LSD operates on edges. One "line" has 2 edges, and so we need to combine the edges back into lines
100
+ # 1. separate out the lines into horizontal and vertical lines.
101
+ # 2. Draw the horizontal lines back onto a canvas, but slightly thicker and longer.
102
+ # 3. Run connected-components on the new canvas
103
+ # 4. Get the bounding box for each component, and the bounding box is final line.
104
+ # 5. The ends of each line is a corner
105
+ # 6. Repeat for vertical lines
106
+ # 7. Draw all the final lines onto another canvas. Where the lines overlap are also corners
107
+
108
+ corners = []
109
+ if lines is not None:
110
+ # separate out the horizontal and vertical lines, and draw them back onto separate canvases
111
+ lines = lines.squeeze().astype(np.int32).tolist()
112
+ horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
113
+ vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
114
+ for line in lines:
115
+ x1, y1, x2, y2, _ = line
116
+ if abs(x2 - x1) > abs(y2 - y1):
117
+ (x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[0])
118
+ cv2.line(horizontal_lines_canvas, (max(x1 - 5, 0), y1), (min(x2 + 5, img.shape[1] - 1), y2), 255, 2)
119
+ else:
120
+ (x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[1])
121
+ cv2.line(vertical_lines_canvas, (x1, max(y1 - 5, 0)), (x2, min(y2 + 5, img.shape[0] - 1)), 255, 2)
122
+
123
+ lines = []
124
+
125
+ # find the horizontal lines (connected-components -> bounding boxes -> final lines)
126
+ (contours, hierarchy) = cv2.findContours(horizontal_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
127
+ contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2]
128
+ horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
129
+ for contour in contours:
130
+ contour = contour.reshape((contour.shape[0], contour.shape[2]))
131
+ min_x = np.amin(contour[:, 0], axis=0) + 2
132
+ max_x = np.amax(contour[:, 0], axis=0) - 2
133
+ left_y = int(np.average(contour[contour[:, 0] == min_x][:, 1]))
134
+ right_y = int(np.average(contour[contour[:, 0] == max_x][:, 1]))
135
+ lines.append((min_x, left_y, max_x, right_y))
136
+ cv2.line(horizontal_lines_canvas, (min_x, left_y), (max_x, right_y), 1, 1)
137
+ corners.append((min_x, left_y))
138
+ corners.append((max_x, right_y))
139
+
140
+ # find the vertical lines (connected-components -> bounding boxes -> final lines)
141
+ (contours, hierarchy) = cv2.findContours(vertical_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
142
+ contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2]
143
+ vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
144
+ for contour in contours:
145
+ contour = contour.reshape((contour.shape[0], contour.shape[2]))
146
+ min_y = np.amin(contour[:, 1], axis=0) + 2
147
+ max_y = np.amax(contour[:, 1], axis=0) - 2
148
+ top_x = int(np.average(contour[contour[:, 1] == min_y][:, 0]))
149
+ bottom_x = int(np.average(contour[contour[:, 1] == max_y][:, 0]))
150
+ lines.append((top_x, min_y, bottom_x, max_y))
151
+ cv2.line(vertical_lines_canvas, (top_x, min_y), (bottom_x, max_y), 1, 1)
152
+ corners.append((top_x, min_y))
153
+ corners.append((bottom_x, max_y))
154
+
155
+ # find the corners
156
+ corners_y, corners_x = np.where(horizontal_lines_canvas + vertical_lines_canvas == 2)
157
+ corners += zip(corners_x, corners_y)
158
+
159
+ # remove corners in close proximity
160
+ corners = self.filter_corners(corners)
161
+ return corners
162
+
163
+ def is_valid_contour(self, cnt, IM_WIDTH, IM_HEIGHT):
164
+ """Returns True if the contour satisfies all requirements set at instantitation"""
165
+
166
+ return (len(cnt) == 4 and cv2.contourArea(cnt) > IM_WIDTH * IM_HEIGHT * self.MIN_QUAD_AREA_RATIO
167
+ and self.angle_range(cnt) < self.MAX_QUAD_ANGLE_RANGE)
168
+
169
+
170
+ def get_contour(self, rescaled_image):
171
+ """
172
+ Returns a numpy array of shape (4, 2) containing the vertices of the four corners
173
+ of the document in the image. It considers the corners returned from get_corners()
174
+ and uses heuristics to choose the four corners that most likely represent
175
+ the corners of the document. If no corners were found, or the four corners represent
176
+ a quadrilateral that is too small or convex, it returns the original four corners.
177
+ """
178
+
179
+ # these constants are carefully chosen
180
+ MORPH = 9
181
+ CANNY = 84
182
+ HOUGH = 25
183
+
184
+ IM_HEIGHT, IM_WIDTH, _ = rescaled_image.shape
185
+
186
+ # convert the image to grayscale and blur it slightly
187
+ gray = cv2.cvtColor(rescaled_image, cv2.COLOR_BGR2GRAY)
188
+ gray = cv2.GaussianBlur(gray, (7,7), 0)
189
+
190
+ # dilate helps to remove potential holes between edge segments
191
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(MORPH,MORPH))
192
+ dilated = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
193
+
194
+ # find edges and mark them in the output map using the Canny algorithm
195
+ edged = cv2.Canny(dilated, 0, CANNY)
196
+ test_corners = self.get_corners(edged)
197
+
198
+ approx_contours = []
199
+
200
+ if len(test_corners) >= 4:
201
+ quads = []
202
+
203
+ for quad in itertools.combinations(test_corners, 4):
204
+ points = np.array(quad)
205
+ points = transform.order_points(points)
206
+ points = np.array([[p] for p in points], dtype = "int32")
207
+ quads.append(points)
208
+
209
+ # get top five quadrilaterals by area
210
+ quads = sorted(quads, key=cv2.contourArea, reverse=True)[:5]
211
+ # sort candidate quadrilaterals by their angle range, which helps remove outliers
212
+ quads = sorted(quads, key=self.angle_range)
213
+
214
+ approx = quads[0]
215
+ if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT):
216
+ approx_contours.append(approx)
217
+
218
+ # for debugging: uncomment the code below to draw the corners and countour found
219
+ # by get_corners() and overlay it on the image
220
+
221
+ # cv2.drawContours(rescaled_image, [approx], -1, (20, 20, 255), 2)
222
+ # plt.scatter(*zip(*test_corners))
223
+ # plt.imshow(rescaled_image)
224
+ # plt.show()
225
+
226
+ # also attempt to find contours directly from the edged image, which occasionally
227
+ # produces better results
228
+ (cnts, hierarchy) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
229
+ cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
230
+
231
+ # loop over the contours
232
+ for c in cnts:
233
+ # approximate the contour
234
+ approx = cv2.approxPolyDP(c, 80, True)
235
+ if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT):
236
+ approx_contours.append(approx)
237
+ break
238
+
239
+ # If we did not find any valid contours, just use the whole image
240
+ if not approx_contours:
241
+ TOP_RIGHT = (IM_WIDTH, 0)
242
+ BOTTOM_RIGHT = (IM_WIDTH, IM_HEIGHT)
243
+ BOTTOM_LEFT = (0, IM_HEIGHT)
244
+ TOP_LEFT = (0, 0)
245
+ screenCnt = np.array([[TOP_RIGHT], [BOTTOM_RIGHT], [BOTTOM_LEFT], [TOP_LEFT]])
246
+
247
+ else:
248
+ screenCnt = max(approx_contours, key=cv2.contourArea)
249
+
250
+ return screenCnt.reshape(4, 2)
251
+
252
+ def interactive_get_contour(self, screenCnt, rescaled_image):
253
+ poly = Polygon(screenCnt, animated=True, fill=False, color="yellow", linewidth=5)
254
+ fig, ax = plt.subplots()
255
+ ax.add_patch(poly)
256
+ ax.set_title(('Drag the corners of the box to the corners of the document. \n'
257
+ 'Close the window when finished.'))
258
+ p = poly_i.PolygonInteractor(ax, poly)
259
+ plt.imshow(rescaled_image)
260
+ plt.show()
261
+
262
+ new_points = p.get_poly_points()[:4]
263
+ new_points = np.array([[p] for p in new_points], dtype = "int32")
264
+ return new_points.reshape(4, 2)
265
+
266
+ def scan(self, image_path):
267
+
268
+ RESCALED_HEIGHT = 500.0
269
+ OUTPUT_DIR = 'output'
270
+
271
+ # load the image and compute the ratio of the old height
272
+ # to the new height, clone it, and resize it
273
+ image = cv2.imread(image_path)
274
+
275
+ assert(image is not None)
276
+
277
+ ratio = image.shape[0] / RESCALED_HEIGHT
278
+ orig = image.copy()
279
+ rescaled_image = imutils.resize(image, height = int(RESCALED_HEIGHT))
280
+
281
+ # get the contour of the document
282
+ screenCnt = self.get_contour(rescaled_image)
283
+
284
+ if self.interactive:
285
+ screenCnt = self.interactive_get_contour(screenCnt, rescaled_image)
286
+
287
+ # apply the perspective transformation
288
+ warped = transform.four_point_transform(orig, screenCnt * ratio)
289
+
290
+ # convert the warped image to grayscale
291
+ gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
292
+
293
+ # sharpen image
294
+ sharpen = cv2.GaussianBlur(gray, (0,0), 3)
295
+ sharpen = cv2.addWeighted(gray, 1.5, sharpen, -0.5, 0)
296
+
297
+ # apply adaptive threshold to get black and white effect
298
+ thresh = cv2.adaptiveThreshold(sharpen, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 15)
299
+
300
+ # save the transformed image
301
+ basename = os.path.basename(image_path)
302
+ cv2.imwrite(OUTPUT_DIR + '/' + basename, thresh)
303
+ print("Proccessed " + basename)
304
+
305
+
306
+ if __name__ == "__main__":
307
+ ap = argparse.ArgumentParser()
308
+ group = ap.add_mutually_exclusive_group(required=True)
309
+ group.add_argument("--images", help="Directory of images to be scanned")
310
+ group.add_argument("--image", help="Path to single image to be scanned")
311
+ ap.add_argument("-i", action='store_true',
312
+ help = "Flag for manually verifying and/or setting document corners")
313
+
314
+ args = vars(ap.parse_args())
315
+ im_dir = args["images"]
316
+ im_file_path = args["image"]
317
+ interactive_mode = args["i"]
318
+
319
+ scanner = DocScanner(interactive_mode)
320
+
321
+ valid_formats = [".jpg", ".jpeg", ".jp2", ".png", ".bmp", ".tiff", ".tif"]
322
+
323
+ get_ext = lambda f: os.path.splitext(f)[1].lower()
324
+
325
+ # Scan single image specified by command line argument --image <IMAGE_PATH>
326
+ if im_file_path:
327
+ scanner.scan(im_file_path)
328
+
329
+ # Scan all valid images in directory specified by command line argument --images <IMAGE_DIR>
330
+ else:
331
+ im_files = [f for f in os.listdir(im_dir) if get_ext(f) in valid_formats]
332
+ for im in im_files:
333
+ scanner.scan(im_dir + '/' + im)