Spaces:
Sleeping
Sleeping
Upload 22 files
Browse files- .gitattributes +16 -0
- output/cell_pic.jpg +3 -0
- output/chart.JPG +3 -0
- output/desk.JPG +3 -0
- output/dollar_bill.JPG +3 -0
- output/math_cheat_sheet.JPG +3 -0
- output/notepad.JPG +3 -0
- output/receipt.jpg +3 -0
- output/tax.jpeg +3 -0
- polygon_interacter.py +106 -0
- pyimagesearch/__init__.py +0 -0
- pyimagesearch/imutils.py +58 -0
- pyimagesearch/transform.py +69 -0
- requirements.txt +0 -0
- sample_images/cell_pic.jpg +3 -0
- sample_images/chart.JPG +3 -0
- sample_images/desk.JPG +3 -0
- sample_images/dollar_bill.JPG +3 -0
- sample_images/math_cheat_sheet.JPG +3 -0
- sample_images/notepad.JPG +3 -0
- sample_images/receipt.jpg +3 -0
- sample_images/tax.jpeg +3 -0
- scan.py +333 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,19 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
output/cell_pic.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
output/chart.JPG filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
output/desk.JPG filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
output/dollar_bill.JPG filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
output/math_cheat_sheet.JPG filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
output/notepad.JPG filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
output/receipt.jpg filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
output/tax.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
sample_images/cell_pic.jpg filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
sample_images/chart.JPG filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
sample_images/desk.JPG filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
sample_images/dollar_bill.JPG filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
sample_images/math_cheat_sheet.JPG filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
sample_images/notepad.JPG filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
sample_images/receipt.jpg filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
sample_images/tax.jpeg filter=lfs diff=lfs merge=lfs -text
|
output/cell_pic.jpg
ADDED
|
Git LFS Details
|
output/chart.JPG
ADDED
|
|
Git LFS Details
|
output/desk.JPG
ADDED
|
|
Git LFS Details
|
output/dollar_bill.JPG
ADDED
|
|
Git LFS Details
|
output/math_cheat_sheet.JPG
ADDED
|
|
Git LFS Details
|
output/notepad.JPG
ADDED
|
|
Git LFS Details
|
output/receipt.jpg
ADDED
|
Git LFS Details
|
output/tax.jpeg
ADDED
|
Git LFS Details
|
polygon_interacter.py
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from matplotlib.lines import Line2D
|
| 3 |
+
from matplotlib.artist import Artist
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class PolygonInteractor(object):
|
| 7 |
+
"""
|
| 8 |
+
An polygon editor
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
showverts = True
|
| 12 |
+
epsilon = 5 # max pixel distance to count as a vertex hit
|
| 13 |
+
|
| 14 |
+
def __init__(self, ax, poly):
|
| 15 |
+
if poly.figure is None:
|
| 16 |
+
raise RuntimeError('You must first add the polygon to a figure or canvas before defining the interactor')
|
| 17 |
+
self.ax = ax
|
| 18 |
+
canvas = poly.figure.canvas
|
| 19 |
+
self.poly = poly
|
| 20 |
+
|
| 21 |
+
x, y = zip(*self.poly.xy)
|
| 22 |
+
self.line = Line2D(x, y, marker='o', markerfacecolor='r', animated=True)
|
| 23 |
+
self.ax.add_line(self.line)
|
| 24 |
+
|
| 25 |
+
cid = self.poly.add_callback(self.poly_changed)
|
| 26 |
+
self._ind = None # the active vert
|
| 27 |
+
|
| 28 |
+
canvas.mpl_connect('draw_event', self.draw_callback)
|
| 29 |
+
canvas.mpl_connect('button_press_event', self.button_press_callback)
|
| 30 |
+
canvas.mpl_connect('button_release_event', self.button_release_callback)
|
| 31 |
+
canvas.mpl_connect('motion_notify_event', self.motion_notify_callback)
|
| 32 |
+
self.canvas = canvas
|
| 33 |
+
|
| 34 |
+
def get_poly_points(self):
|
| 35 |
+
return np.asarray(self.poly.xy)
|
| 36 |
+
|
| 37 |
+
def draw_callback(self, event):
|
| 38 |
+
self.background = self.canvas.copy_from_bbox(self.ax.bbox)
|
| 39 |
+
self.ax.draw_artist(self.poly)
|
| 40 |
+
self.ax.draw_artist(self.line)
|
| 41 |
+
self.canvas.blit(self.ax.bbox)
|
| 42 |
+
|
| 43 |
+
def poly_changed(self, poly):
|
| 44 |
+
'this method is called whenever the polygon object is called'
|
| 45 |
+
# only copy the artist props to the line (except visibility)
|
| 46 |
+
vis = self.line.get_visible()
|
| 47 |
+
Artist.update_from(self.line, poly)
|
| 48 |
+
self.line.set_visible(vis) # don't use the poly visibility state
|
| 49 |
+
|
| 50 |
+
def get_ind_under_point(self, event):
|
| 51 |
+
'get the index of the vertex under point if within epsilon tolerance'
|
| 52 |
+
|
| 53 |
+
# display coords
|
| 54 |
+
xy = np.asarray(self.poly.xy)
|
| 55 |
+
xyt = self.poly.get_transform().transform(xy)
|
| 56 |
+
xt, yt = xyt[:, 0], xyt[:, 1]
|
| 57 |
+
d = np.sqrt((xt - event.x)**2 + (yt - event.y)**2)
|
| 58 |
+
indseq = np.nonzero(np.equal(d, np.amin(d)))[0]
|
| 59 |
+
ind = indseq[0]
|
| 60 |
+
|
| 61 |
+
if d[ind] >= self.epsilon:
|
| 62 |
+
ind = None
|
| 63 |
+
|
| 64 |
+
return ind
|
| 65 |
+
|
| 66 |
+
def button_press_callback(self, event):
|
| 67 |
+
'whenever a mouse button is pressed'
|
| 68 |
+
if not self.showverts:
|
| 69 |
+
return
|
| 70 |
+
if event.inaxes is None:
|
| 71 |
+
return
|
| 72 |
+
if event.button != 1:
|
| 73 |
+
return
|
| 74 |
+
self._ind = self.get_ind_under_point(event)
|
| 75 |
+
|
| 76 |
+
def button_release_callback(self, event):
|
| 77 |
+
'whenever a mouse button is released'
|
| 78 |
+
if not self.showverts:
|
| 79 |
+
return
|
| 80 |
+
if event.button != 1:
|
| 81 |
+
return
|
| 82 |
+
self._ind = None
|
| 83 |
+
|
| 84 |
+
def motion_notify_callback(self, event):
|
| 85 |
+
'on mouse movement'
|
| 86 |
+
if not self.showverts:
|
| 87 |
+
return
|
| 88 |
+
if self._ind is None:
|
| 89 |
+
return
|
| 90 |
+
if event.inaxes is None:
|
| 91 |
+
return
|
| 92 |
+
if event.button != 1:
|
| 93 |
+
return
|
| 94 |
+
x, y = event.xdata, event.ydata
|
| 95 |
+
|
| 96 |
+
self.poly.xy[self._ind] = x, y
|
| 97 |
+
if self._ind == 0:
|
| 98 |
+
self.poly.xy[-1] = x, y
|
| 99 |
+
elif self._ind == len(self.poly.xy) - 1:
|
| 100 |
+
self.poly.xy[0] = x, y
|
| 101 |
+
self.line.set_data(zip(*self.poly.xy))
|
| 102 |
+
|
| 103 |
+
self.canvas.restore_region(self.background)
|
| 104 |
+
self.ax.draw_artist(self.poly)
|
| 105 |
+
self.ax.draw_artist(self.line)
|
| 106 |
+
self.canvas.blit(self.ax.bbox)
|
pyimagesearch/__init__.py
ADDED
|
File without changes
|
pyimagesearch/imutils.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Import the necessary packages
|
| 2 |
+
import numpy as np
|
| 3 |
+
import cv2
|
| 4 |
+
|
| 5 |
+
def translate(image, x, y):
|
| 6 |
+
# Define the translation matrix and perform the translation
|
| 7 |
+
M = np.float32([[1, 0, x], [0, 1, y]])
|
| 8 |
+
shifted = cv2.warpAffine(image, M, (image.shape[1], image.shape[0]))
|
| 9 |
+
|
| 10 |
+
# Return the translated image
|
| 11 |
+
return shifted
|
| 12 |
+
|
| 13 |
+
def rotate(image, angle, center = None, scale = 1.0):
|
| 14 |
+
# Grab the dimensions of the image
|
| 15 |
+
(h, w) = image.shape[:2]
|
| 16 |
+
|
| 17 |
+
# If the center is None, initialize it as the center of
|
| 18 |
+
# the image
|
| 19 |
+
if center is None:
|
| 20 |
+
center = (w / 2, h / 2)
|
| 21 |
+
|
| 22 |
+
# Perform the rotation
|
| 23 |
+
M = cv2.getRotationMatrix2D(center, angle, scale)
|
| 24 |
+
rotated = cv2.warpAffine(image, M, (w, h))
|
| 25 |
+
|
| 26 |
+
# Return the rotated image
|
| 27 |
+
return rotated
|
| 28 |
+
|
| 29 |
+
def resize(image, width = None, height = None, inter = cv2.INTER_AREA):
|
| 30 |
+
# initialize the dimensions of the image to be resized and
|
| 31 |
+
# grab the image size
|
| 32 |
+
dim = None
|
| 33 |
+
(h, w) = image.shape[:2]
|
| 34 |
+
|
| 35 |
+
# if both the width and height are None, then return the
|
| 36 |
+
# original image
|
| 37 |
+
if width is None and height is None:
|
| 38 |
+
return image
|
| 39 |
+
|
| 40 |
+
# check to see if the width is None
|
| 41 |
+
if width is None:
|
| 42 |
+
# calculate the ratio of the height and construct the
|
| 43 |
+
# dimensions
|
| 44 |
+
r = height / float(h)
|
| 45 |
+
dim = (int(w * r), height)
|
| 46 |
+
|
| 47 |
+
# otherwise, the height is None
|
| 48 |
+
else:
|
| 49 |
+
# calculate the ratio of the width and construct the
|
| 50 |
+
# dimensions
|
| 51 |
+
r = width / float(w)
|
| 52 |
+
dim = (width, int(h * r))
|
| 53 |
+
|
| 54 |
+
# resize the image
|
| 55 |
+
resized = cv2.resize(image, dim, interpolation = inter)
|
| 56 |
+
|
| 57 |
+
# return the resized image
|
| 58 |
+
return resized
|
pyimagesearch/transform.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# import the necessary packages
|
| 2 |
+
from scipy.spatial import distance as dist
|
| 3 |
+
import numpy as np
|
| 4 |
+
import cv2
|
| 5 |
+
|
| 6 |
+
def order_points(pts):
|
| 7 |
+
# sort the points based on their x-coordinates
|
| 8 |
+
xSorted = pts[np.argsort(pts[:, 0]), :]
|
| 9 |
+
|
| 10 |
+
# grab the left-most and right-most points from the sorted
|
| 11 |
+
# x-roodinate points
|
| 12 |
+
leftMost = xSorted[:2, :]
|
| 13 |
+
rightMost = xSorted[2:, :]
|
| 14 |
+
|
| 15 |
+
# now, sort the left-most coordinates according to their
|
| 16 |
+
# y-coordinates so we can grab the top-left and bottom-left
|
| 17 |
+
# points, respectively
|
| 18 |
+
leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
|
| 19 |
+
(tl, bl) = leftMost
|
| 20 |
+
|
| 21 |
+
# now that we have the top-left coordinate, use it as an
|
| 22 |
+
# anchor to calculate the Euclidean distance between the
|
| 23 |
+
# top-left and right-most points; by the Pythagorean
|
| 24 |
+
# theorem, the point with the largest distance will be
|
| 25 |
+
# our bottom-right point
|
| 26 |
+
D = dist.cdist(tl[np.newaxis], rightMost, "euclidean")[0]
|
| 27 |
+
(br, tr) = rightMost[np.argsort(D)[::-1], :]
|
| 28 |
+
|
| 29 |
+
# return the coordinates in top-left, top-right,
|
| 30 |
+
# bottom-right, and bottom-left order
|
| 31 |
+
return np.array([tl, tr, br, bl], dtype = "float32")
|
| 32 |
+
|
| 33 |
+
def four_point_transform(image, pts):
|
| 34 |
+
# obtain a consistent order of the points and unpack them
|
| 35 |
+
# individually
|
| 36 |
+
rect = order_points(pts)
|
| 37 |
+
(tl, tr, br, bl) = rect
|
| 38 |
+
|
| 39 |
+
# compute the width of the new image, which will be the
|
| 40 |
+
# maximum distance between bottom-right and bottom-left
|
| 41 |
+
# x-coordiates or the top-right and top-left x-coordinates
|
| 42 |
+
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
|
| 43 |
+
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
|
| 44 |
+
maxWidth = max(int(widthA), int(widthB))
|
| 45 |
+
|
| 46 |
+
# compute the height of the new image, which will be the
|
| 47 |
+
# maximum distance between the top-right and bottom-right
|
| 48 |
+
# y-coordinates or the top-left and bottom-left y-coordinates
|
| 49 |
+
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
|
| 50 |
+
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
|
| 51 |
+
maxHeight = max(int(heightA), int(heightB))
|
| 52 |
+
|
| 53 |
+
# now that we have the dimensions of the new image, construct
|
| 54 |
+
# the set of destination points to obtain a "birds eye view",
|
| 55 |
+
# (i.e. top-down view) of the image, again specifying points
|
| 56 |
+
# in the top-left, top-right, bottom-right, and bottom-left
|
| 57 |
+
# order
|
| 58 |
+
dst = np.array([
|
| 59 |
+
[0, 0],
|
| 60 |
+
[maxWidth - 1, 0],
|
| 61 |
+
[maxWidth - 1, maxHeight - 1],
|
| 62 |
+
[0, maxHeight - 1]], dtype = "float32")
|
| 63 |
+
|
| 64 |
+
# compute the perspective transform matrix and then apply it
|
| 65 |
+
M = cv2.getPerspectiveTransform(rect, dst)
|
| 66 |
+
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
|
| 67 |
+
|
| 68 |
+
# return the warped image
|
| 69 |
+
return warped
|
requirements.txt
CHANGED
|
Binary files a/requirements.txt and b/requirements.txt differ
|
|
|
sample_images/cell_pic.jpg
ADDED
|
Git LFS Details
|
sample_images/chart.JPG
ADDED
|
|
Git LFS Details
|
sample_images/desk.JPG
ADDED
|
|
Git LFS Details
|
sample_images/dollar_bill.JPG
ADDED
|
|
Git LFS Details
|
sample_images/math_cheat_sheet.JPG
ADDED
|
|
Git LFS Details
|
sample_images/notepad.JPG
ADDED
|
|
Git LFS Details
|
sample_images/receipt.jpg
ADDED
|
Git LFS Details
|
sample_images/tax.jpeg
ADDED
|
Git LFS Details
|
scan.py
ADDED
|
@@ -0,0 +1,333 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# USAGE:
|
| 2 |
+
# python scan.py (--images <IMG_DIR> | --image <IMG_PATH>) [-i]
|
| 3 |
+
# For example, to scan a single image with interactive mode:
|
| 4 |
+
# python scan.py --image sample_images/desk.JPG -i
|
| 5 |
+
# To scan all images in a directory automatically:
|
| 6 |
+
# python scan.py --images sample_images
|
| 7 |
+
|
| 8 |
+
# Scanned images will be output to directory named 'output'
|
| 9 |
+
|
| 10 |
+
from pyimagesearch import transform
|
| 11 |
+
from pyimagesearch import imutils
|
| 12 |
+
from scipy.spatial import distance as dist
|
| 13 |
+
from matplotlib.patches import Polygon
|
| 14 |
+
import polygon_interacter as poly_i
|
| 15 |
+
import numpy as np
|
| 16 |
+
import matplotlib.pyplot as plt
|
| 17 |
+
import itertools
|
| 18 |
+
import math
|
| 19 |
+
import cv2
|
| 20 |
+
from pylsd.lsd import lsd
|
| 21 |
+
|
| 22 |
+
import argparse
|
| 23 |
+
import os
|
| 24 |
+
|
| 25 |
+
class DocScanner(object):
|
| 26 |
+
"""An image scanner"""
|
| 27 |
+
|
| 28 |
+
def __init__(self, interactive=False, MIN_QUAD_AREA_RATIO=0.25, MAX_QUAD_ANGLE_RANGE=40):
|
| 29 |
+
"""
|
| 30 |
+
Args:
|
| 31 |
+
interactive (boolean): If True, user can adjust screen contour before
|
| 32 |
+
transformation occurs in interactive pyplot window.
|
| 33 |
+
MIN_QUAD_AREA_RATIO (float): A contour will be rejected if its corners
|
| 34 |
+
do not form a quadrilateral that covers at least MIN_QUAD_AREA_RATIO
|
| 35 |
+
of the original image. Defaults to 0.25.
|
| 36 |
+
MAX_QUAD_ANGLE_RANGE (int): A contour will also be rejected if the range
|
| 37 |
+
of its interior angles exceeds MAX_QUAD_ANGLE_RANGE. Defaults to 40.
|
| 38 |
+
"""
|
| 39 |
+
self.interactive = interactive
|
| 40 |
+
self.MIN_QUAD_AREA_RATIO = MIN_QUAD_AREA_RATIO
|
| 41 |
+
self.MAX_QUAD_ANGLE_RANGE = MAX_QUAD_ANGLE_RANGE
|
| 42 |
+
|
| 43 |
+
def filter_corners(self, corners, min_dist=20):
|
| 44 |
+
"""Filters corners that are within min_dist of others"""
|
| 45 |
+
def predicate(representatives, corner):
|
| 46 |
+
return all(dist.euclidean(representative, corner) >= min_dist
|
| 47 |
+
for representative in representatives)
|
| 48 |
+
|
| 49 |
+
filtered_corners = []
|
| 50 |
+
for c in corners:
|
| 51 |
+
if predicate(filtered_corners, c):
|
| 52 |
+
filtered_corners.append(c)
|
| 53 |
+
return filtered_corners
|
| 54 |
+
|
| 55 |
+
def angle_between_vectors_degrees(self, u, v):
|
| 56 |
+
"""Returns the angle between two vectors in degrees"""
|
| 57 |
+
return np.degrees(
|
| 58 |
+
math.acos(np.dot(u, v) / (np.linalg.norm(u) * np.linalg.norm(v))))
|
| 59 |
+
|
| 60 |
+
def get_angle(self, p1, p2, p3):
|
| 61 |
+
"""
|
| 62 |
+
Returns the angle between the line segment from p2 to p1
|
| 63 |
+
and the line segment from p2 to p3 in degrees
|
| 64 |
+
"""
|
| 65 |
+
a = np.radians(np.array(p1))
|
| 66 |
+
b = np.radians(np.array(p2))
|
| 67 |
+
c = np.radians(np.array(p3))
|
| 68 |
+
|
| 69 |
+
avec = a - b
|
| 70 |
+
cvec = c - b
|
| 71 |
+
|
| 72 |
+
return self.angle_between_vectors_degrees(avec, cvec)
|
| 73 |
+
|
| 74 |
+
def angle_range(self, quad):
|
| 75 |
+
"""
|
| 76 |
+
Returns the range between max and min interior angles of quadrilateral.
|
| 77 |
+
The input quadrilateral must be a numpy array with vertices ordered clockwise
|
| 78 |
+
starting with the top left vertex.
|
| 79 |
+
"""
|
| 80 |
+
tl, tr, br, bl = quad
|
| 81 |
+
ura = self.get_angle(tl[0], tr[0], br[0])
|
| 82 |
+
ula = self.get_angle(bl[0], tl[0], tr[0])
|
| 83 |
+
lra = self.get_angle(tr[0], br[0], bl[0])
|
| 84 |
+
lla = self.get_angle(br[0], bl[0], tl[0])
|
| 85 |
+
|
| 86 |
+
angles = [ura, ula, lra, lla]
|
| 87 |
+
return np.ptp(angles)
|
| 88 |
+
|
| 89 |
+
def get_corners(self, img):
|
| 90 |
+
"""
|
| 91 |
+
Returns a list of corners ((x, y) tuples) found in the input image. With proper
|
| 92 |
+
pre-processing and filtering, it should output at most 10 potential corners.
|
| 93 |
+
This is a utility function used by get_contours. The input image is expected
|
| 94 |
+
to be rescaled and Canny filtered prior to be passed in.
|
| 95 |
+
"""
|
| 96 |
+
lines = lsd(img)
|
| 97 |
+
|
| 98 |
+
# massages the output from LSD
|
| 99 |
+
# LSD operates on edges. One "line" has 2 edges, and so we need to combine the edges back into lines
|
| 100 |
+
# 1. separate out the lines into horizontal and vertical lines.
|
| 101 |
+
# 2. Draw the horizontal lines back onto a canvas, but slightly thicker and longer.
|
| 102 |
+
# 3. Run connected-components on the new canvas
|
| 103 |
+
# 4. Get the bounding box for each component, and the bounding box is final line.
|
| 104 |
+
# 5. The ends of each line is a corner
|
| 105 |
+
# 6. Repeat for vertical lines
|
| 106 |
+
# 7. Draw all the final lines onto another canvas. Where the lines overlap are also corners
|
| 107 |
+
|
| 108 |
+
corners = []
|
| 109 |
+
if lines is not None:
|
| 110 |
+
# separate out the horizontal and vertical lines, and draw them back onto separate canvases
|
| 111 |
+
lines = lines.squeeze().astype(np.int32).tolist()
|
| 112 |
+
horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
|
| 113 |
+
vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
|
| 114 |
+
for line in lines:
|
| 115 |
+
x1, y1, x2, y2, _ = line
|
| 116 |
+
if abs(x2 - x1) > abs(y2 - y1):
|
| 117 |
+
(x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[0])
|
| 118 |
+
cv2.line(horizontal_lines_canvas, (max(x1 - 5, 0), y1), (min(x2 + 5, img.shape[1] - 1), y2), 255, 2)
|
| 119 |
+
else:
|
| 120 |
+
(x1, y1), (x2, y2) = sorted(((x1, y1), (x2, y2)), key=lambda pt: pt[1])
|
| 121 |
+
cv2.line(vertical_lines_canvas, (x1, max(y1 - 5, 0)), (x2, min(y2 + 5, img.shape[0] - 1)), 255, 2)
|
| 122 |
+
|
| 123 |
+
lines = []
|
| 124 |
+
|
| 125 |
+
# find the horizontal lines (connected-components -> bounding boxes -> final lines)
|
| 126 |
+
(contours, hierarchy) = cv2.findContours(horizontal_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
|
| 127 |
+
contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2]
|
| 128 |
+
horizontal_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
|
| 129 |
+
for contour in contours:
|
| 130 |
+
contour = contour.reshape((contour.shape[0], contour.shape[2]))
|
| 131 |
+
min_x = np.amin(contour[:, 0], axis=0) + 2
|
| 132 |
+
max_x = np.amax(contour[:, 0], axis=0) - 2
|
| 133 |
+
left_y = int(np.average(contour[contour[:, 0] == min_x][:, 1]))
|
| 134 |
+
right_y = int(np.average(contour[contour[:, 0] == max_x][:, 1]))
|
| 135 |
+
lines.append((min_x, left_y, max_x, right_y))
|
| 136 |
+
cv2.line(horizontal_lines_canvas, (min_x, left_y), (max_x, right_y), 1, 1)
|
| 137 |
+
corners.append((min_x, left_y))
|
| 138 |
+
corners.append((max_x, right_y))
|
| 139 |
+
|
| 140 |
+
# find the vertical lines (connected-components -> bounding boxes -> final lines)
|
| 141 |
+
(contours, hierarchy) = cv2.findContours(vertical_lines_canvas, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
|
| 142 |
+
contours = sorted(contours, key=lambda c: cv2.arcLength(c, True), reverse=True)[:2]
|
| 143 |
+
vertical_lines_canvas = np.zeros(img.shape, dtype=np.uint8)
|
| 144 |
+
for contour in contours:
|
| 145 |
+
contour = contour.reshape((contour.shape[0], contour.shape[2]))
|
| 146 |
+
min_y = np.amin(contour[:, 1], axis=0) + 2
|
| 147 |
+
max_y = np.amax(contour[:, 1], axis=0) - 2
|
| 148 |
+
top_x = int(np.average(contour[contour[:, 1] == min_y][:, 0]))
|
| 149 |
+
bottom_x = int(np.average(contour[contour[:, 1] == max_y][:, 0]))
|
| 150 |
+
lines.append((top_x, min_y, bottom_x, max_y))
|
| 151 |
+
cv2.line(vertical_lines_canvas, (top_x, min_y), (bottom_x, max_y), 1, 1)
|
| 152 |
+
corners.append((top_x, min_y))
|
| 153 |
+
corners.append((bottom_x, max_y))
|
| 154 |
+
|
| 155 |
+
# find the corners
|
| 156 |
+
corners_y, corners_x = np.where(horizontal_lines_canvas + vertical_lines_canvas == 2)
|
| 157 |
+
corners += zip(corners_x, corners_y)
|
| 158 |
+
|
| 159 |
+
# remove corners in close proximity
|
| 160 |
+
corners = self.filter_corners(corners)
|
| 161 |
+
return corners
|
| 162 |
+
|
| 163 |
+
def is_valid_contour(self, cnt, IM_WIDTH, IM_HEIGHT):
|
| 164 |
+
"""Returns True if the contour satisfies all requirements set at instantitation"""
|
| 165 |
+
|
| 166 |
+
return (len(cnt) == 4 and cv2.contourArea(cnt) > IM_WIDTH * IM_HEIGHT * self.MIN_QUAD_AREA_RATIO
|
| 167 |
+
and self.angle_range(cnt) < self.MAX_QUAD_ANGLE_RANGE)
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def get_contour(self, rescaled_image):
|
| 171 |
+
"""
|
| 172 |
+
Returns a numpy array of shape (4, 2) containing the vertices of the four corners
|
| 173 |
+
of the document in the image. It considers the corners returned from get_corners()
|
| 174 |
+
and uses heuristics to choose the four corners that most likely represent
|
| 175 |
+
the corners of the document. If no corners were found, or the four corners represent
|
| 176 |
+
a quadrilateral that is too small or convex, it returns the original four corners.
|
| 177 |
+
"""
|
| 178 |
+
|
| 179 |
+
# these constants are carefully chosen
|
| 180 |
+
MORPH = 9
|
| 181 |
+
CANNY = 84
|
| 182 |
+
HOUGH = 25
|
| 183 |
+
|
| 184 |
+
IM_HEIGHT, IM_WIDTH, _ = rescaled_image.shape
|
| 185 |
+
|
| 186 |
+
# convert the image to grayscale and blur it slightly
|
| 187 |
+
gray = cv2.cvtColor(rescaled_image, cv2.COLOR_BGR2GRAY)
|
| 188 |
+
gray = cv2.GaussianBlur(gray, (7,7), 0)
|
| 189 |
+
|
| 190 |
+
# dilate helps to remove potential holes between edge segments
|
| 191 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(MORPH,MORPH))
|
| 192 |
+
dilated = cv2.morphologyEx(gray, cv2.MORPH_CLOSE, kernel)
|
| 193 |
+
|
| 194 |
+
# find edges and mark them in the output map using the Canny algorithm
|
| 195 |
+
edged = cv2.Canny(dilated, 0, CANNY)
|
| 196 |
+
test_corners = self.get_corners(edged)
|
| 197 |
+
|
| 198 |
+
approx_contours = []
|
| 199 |
+
|
| 200 |
+
if len(test_corners) >= 4:
|
| 201 |
+
quads = []
|
| 202 |
+
|
| 203 |
+
for quad in itertools.combinations(test_corners, 4):
|
| 204 |
+
points = np.array(quad)
|
| 205 |
+
points = transform.order_points(points)
|
| 206 |
+
points = np.array([[p] for p in points], dtype = "int32")
|
| 207 |
+
quads.append(points)
|
| 208 |
+
|
| 209 |
+
# get top five quadrilaterals by area
|
| 210 |
+
quads = sorted(quads, key=cv2.contourArea, reverse=True)[:5]
|
| 211 |
+
# sort candidate quadrilaterals by their angle range, which helps remove outliers
|
| 212 |
+
quads = sorted(quads, key=self.angle_range)
|
| 213 |
+
|
| 214 |
+
approx = quads[0]
|
| 215 |
+
if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT):
|
| 216 |
+
approx_contours.append(approx)
|
| 217 |
+
|
| 218 |
+
# for debugging: uncomment the code below to draw the corners and countour found
|
| 219 |
+
# by get_corners() and overlay it on the image
|
| 220 |
+
|
| 221 |
+
# cv2.drawContours(rescaled_image, [approx], -1, (20, 20, 255), 2)
|
| 222 |
+
# plt.scatter(*zip(*test_corners))
|
| 223 |
+
# plt.imshow(rescaled_image)
|
| 224 |
+
# plt.show()
|
| 225 |
+
|
| 226 |
+
# also attempt to find contours directly from the edged image, which occasionally
|
| 227 |
+
# produces better results
|
| 228 |
+
(cnts, hierarchy) = cv2.findContours(edged.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
|
| 229 |
+
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
|
| 230 |
+
|
| 231 |
+
# loop over the contours
|
| 232 |
+
for c in cnts:
|
| 233 |
+
# approximate the contour
|
| 234 |
+
approx = cv2.approxPolyDP(c, 80, True)
|
| 235 |
+
if self.is_valid_contour(approx, IM_WIDTH, IM_HEIGHT):
|
| 236 |
+
approx_contours.append(approx)
|
| 237 |
+
break
|
| 238 |
+
|
| 239 |
+
# If we did not find any valid contours, just use the whole image
|
| 240 |
+
if not approx_contours:
|
| 241 |
+
TOP_RIGHT = (IM_WIDTH, 0)
|
| 242 |
+
BOTTOM_RIGHT = (IM_WIDTH, IM_HEIGHT)
|
| 243 |
+
BOTTOM_LEFT = (0, IM_HEIGHT)
|
| 244 |
+
TOP_LEFT = (0, 0)
|
| 245 |
+
screenCnt = np.array([[TOP_RIGHT], [BOTTOM_RIGHT], [BOTTOM_LEFT], [TOP_LEFT]])
|
| 246 |
+
|
| 247 |
+
else:
|
| 248 |
+
screenCnt = max(approx_contours, key=cv2.contourArea)
|
| 249 |
+
|
| 250 |
+
return screenCnt.reshape(4, 2)
|
| 251 |
+
|
| 252 |
+
def interactive_get_contour(self, screenCnt, rescaled_image):
|
| 253 |
+
poly = Polygon(screenCnt, animated=True, fill=False, color="yellow", linewidth=5)
|
| 254 |
+
fig, ax = plt.subplots()
|
| 255 |
+
ax.add_patch(poly)
|
| 256 |
+
ax.set_title(('Drag the corners of the box to the corners of the document. \n'
|
| 257 |
+
'Close the window when finished.'))
|
| 258 |
+
p = poly_i.PolygonInteractor(ax, poly)
|
| 259 |
+
plt.imshow(rescaled_image)
|
| 260 |
+
plt.show()
|
| 261 |
+
|
| 262 |
+
new_points = p.get_poly_points()[:4]
|
| 263 |
+
new_points = np.array([[p] for p in new_points], dtype = "int32")
|
| 264 |
+
return new_points.reshape(4, 2)
|
| 265 |
+
|
| 266 |
+
def scan(self, image_path):
|
| 267 |
+
|
| 268 |
+
RESCALED_HEIGHT = 500.0
|
| 269 |
+
OUTPUT_DIR = 'output'
|
| 270 |
+
|
| 271 |
+
# load the image and compute the ratio of the old height
|
| 272 |
+
# to the new height, clone it, and resize it
|
| 273 |
+
image = cv2.imread(image_path)
|
| 274 |
+
|
| 275 |
+
assert(image is not None)
|
| 276 |
+
|
| 277 |
+
ratio = image.shape[0] / RESCALED_HEIGHT
|
| 278 |
+
orig = image.copy()
|
| 279 |
+
rescaled_image = imutils.resize(image, height = int(RESCALED_HEIGHT))
|
| 280 |
+
|
| 281 |
+
# get the contour of the document
|
| 282 |
+
screenCnt = self.get_contour(rescaled_image)
|
| 283 |
+
|
| 284 |
+
if self.interactive:
|
| 285 |
+
screenCnt = self.interactive_get_contour(screenCnt, rescaled_image)
|
| 286 |
+
|
| 287 |
+
# apply the perspective transformation
|
| 288 |
+
warped = transform.four_point_transform(orig, screenCnt * ratio)
|
| 289 |
+
|
| 290 |
+
# convert the warped image to grayscale
|
| 291 |
+
gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
|
| 292 |
+
|
| 293 |
+
# sharpen image
|
| 294 |
+
sharpen = cv2.GaussianBlur(gray, (0,0), 3)
|
| 295 |
+
sharpen = cv2.addWeighted(gray, 1.5, sharpen, -0.5, 0)
|
| 296 |
+
|
| 297 |
+
# apply adaptive threshold to get black and white effect
|
| 298 |
+
thresh = cv2.adaptiveThreshold(sharpen, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 21, 15)
|
| 299 |
+
|
| 300 |
+
# save the transformed image
|
| 301 |
+
basename = os.path.basename(image_path)
|
| 302 |
+
cv2.imwrite(OUTPUT_DIR + '/' + basename, thresh)
|
| 303 |
+
print("Proccessed " + basename)
|
| 304 |
+
|
| 305 |
+
|
| 306 |
+
if __name__ == "__main__":
|
| 307 |
+
ap = argparse.ArgumentParser()
|
| 308 |
+
group = ap.add_mutually_exclusive_group(required=True)
|
| 309 |
+
group.add_argument("--images", help="Directory of images to be scanned")
|
| 310 |
+
group.add_argument("--image", help="Path to single image to be scanned")
|
| 311 |
+
ap.add_argument("-i", action='store_true',
|
| 312 |
+
help = "Flag for manually verifying and/or setting document corners")
|
| 313 |
+
|
| 314 |
+
args = vars(ap.parse_args())
|
| 315 |
+
im_dir = args["images"]
|
| 316 |
+
im_file_path = args["image"]
|
| 317 |
+
interactive_mode = args["i"]
|
| 318 |
+
|
| 319 |
+
scanner = DocScanner(interactive_mode)
|
| 320 |
+
|
| 321 |
+
valid_formats = [".jpg", ".jpeg", ".jp2", ".png", ".bmp", ".tiff", ".tif"]
|
| 322 |
+
|
| 323 |
+
get_ext = lambda f: os.path.splitext(f)[1].lower()
|
| 324 |
+
|
| 325 |
+
# Scan single image specified by command line argument --image <IMAGE_PATH>
|
| 326 |
+
if im_file_path:
|
| 327 |
+
scanner.scan(im_file_path)
|
| 328 |
+
|
| 329 |
+
# Scan all valid images in directory specified by command line argument --images <IMAGE_DIR>
|
| 330 |
+
else:
|
| 331 |
+
im_files = [f for f in os.listdir(im_dir) if get_ext(f) in valid_formats]
|
| 332 |
+
for im in im_files:
|
| 333 |
+
scanner.scan(im_dir + '/' + im)
|