Spaces:
Runtime error
Runtime error
| # pylint: disable=invalid-name,too-many-branches,too-many-statements,too-many-arguments | |
| import os | |
| import io | |
| import typing | |
| import hashlib | |
| import urllib.request | |
| import urllib.parse | |
| import cv2 | |
| import imgaug | |
| import numpy as np | |
| import validators | |
| import typing_extensions as tx | |
| import matplotlib.pyplot as plt | |
| from shapely import geometry | |
| from scipy import spatial | |
| def read(filepath_or_buffer: typing.Union[str, io.BytesIO, np.ndarray]): | |
| """Read a file into an image object | |
| Args: | |
| filepath_or_buffer: The path to the file, a URL, or any object | |
| with a `read` method (such as `io.BytesIO`) | |
| """ | |
| if isinstance(filepath_or_buffer, np.ndarray): | |
| return filepath_or_buffer | |
| if hasattr(filepath_or_buffer, "read"): | |
| image = np.asarray(bytearray(filepath_or_buffer.read()), dtype=np.uint8) # type: ignore | |
| image = cv2.imdecode(image, cv2.IMREAD_UNCHANGED) | |
| elif isinstance(filepath_or_buffer, str): | |
| if validators.url(filepath_or_buffer): | |
| return read(urllib.request.urlopen(filepath_or_buffer)) | |
| assert os.path.isfile(filepath_or_buffer), ( | |
| "Could not find image at path: " + filepath_or_buffer | |
| ) | |
| image = cv2.imread(filepath_or_buffer) | |
| return cv2.cvtColor(image, cv2.COLOR_BGR2RGB) | |
| def get_rotated_width_height(box): | |
| """ | |
| Returns the width and height of a rotated rectangle | |
| Args: | |
| box: A list of four points starting in the top left | |
| corner and moving clockwise. | |
| """ | |
| w = ( | |
| spatial.distance.cdist(box[0][np.newaxis], box[1][np.newaxis], "euclidean") | |
| + spatial.distance.cdist(box[2][np.newaxis], box[3][np.newaxis], "euclidean") | |
| ) / 2 | |
| h = ( | |
| spatial.distance.cdist(box[0][np.newaxis], box[3][np.newaxis], "euclidean") | |
| + spatial.distance.cdist(box[1][np.newaxis], box[2][np.newaxis], "euclidean") | |
| ) / 2 | |
| return int(w[0][0]), int(h[0][0]) | |
| # pylint:disable=too-many-locals | |
| def warpBox( | |
| image, | |
| box, | |
| target_height=None, | |
| target_width=None, | |
| margin=0, | |
| cval=None, | |
| return_transform=False, | |
| skip_rotate=False, | |
| ): | |
| """Warp a boxed region in an image given by a set of four points into | |
| a rectangle with a specified width and height. Useful for taking crops | |
| of distorted or rotated text. | |
| Args: | |
| image: The image from which to take the box | |
| box: A list of four points starting in the top left | |
| corner and moving clockwise. | |
| target_height: The height of the output rectangle | |
| target_width: The width of the output rectangle | |
| return_transform: Whether to return the transformation | |
| matrix with the image. | |
| """ | |
| if cval is None: | |
| cval = (0, 0, 0) if len(image.shape) == 3 else 0 | |
| if not skip_rotate: | |
| box, _ = get_rotated_box(box) | |
| w, h = get_rotated_width_height(box) | |
| assert (target_width is None and target_height is None) or ( | |
| target_width is not None and target_height is not None | |
| ), "Either both or neither of target width and height must be provided." | |
| if target_width is None and target_height is None: | |
| target_width = w | |
| target_height = h | |
| scale = min(target_width / w, target_height / h) | |
| M = cv2.getPerspectiveTransform( | |
| src=box, | |
| dst=np.array( | |
| [ | |
| [margin, margin], | |
| [scale * w - margin, margin], | |
| [scale * w - margin, scale * h - margin], | |
| [margin, scale * h - margin], | |
| ] | |
| ).astype("float32"), | |
| ) | |
| crop = cv2.warpPerspective(image, M, dsize=(int(scale * w), int(scale * h))) | |
| target_shape = ( | |
| (target_height, target_width, 3) | |
| if len(image.shape) == 3 | |
| else (target_height, target_width) | |
| ) | |
| full = (np.zeros(target_shape) + cval).astype("uint8") | |
| full[: crop.shape[0], : crop.shape[1]] = crop | |
| if return_transform: | |
| return full, M | |
| return full | |
| def flatten(list_of_lists): | |
| return [item for sublist in list_of_lists for item in sublist] | |
| def combine_line(line): | |
| """Combine a set of boxes in a line into a single bounding | |
| box. | |
| Args: | |
| line: A list of (box, character) entries | |
| Returns: | |
| A (box, text) tuple | |
| """ | |
| text = "".join( | |
| [character if character is not None else "" for _, character in line] | |
| ) | |
| box = np.concatenate( | |
| [coords[:2] for coords, _ in line] | |
| + [np.array([coords[3], coords[2]]) for coords, _ in reversed(line)] | |
| ).astype("float32") | |
| first_point = box[0] | |
| rectangle = cv2.minAreaRect(box) | |
| box = cv2.boxPoints(rectangle) | |
| # Put the points in clockwise order | |
| box = np.array(np.roll(box, -np.linalg.norm(box - first_point, axis=1).argmin(), 0)) | |
| return box, text | |
| def drawAnnotations(image, predictions, ax=None): | |
| """Draw text annotations onto image. | |
| Args: | |
| image: The image on which to draw | |
| predictions: The predictions as provided by `pipeline.recognize`. | |
| ax: A matplotlib axis on which to draw. | |
| """ | |
| if ax is None: | |
| _, ax = plt.subplots() | |
| ax.imshow(drawBoxes(image=image, boxes=predictions, boxes_format="predictions")) | |
| predictions = sorted(predictions, key=lambda p: p[1][:, 1].min()) | |
| left = [] | |
| right = [] | |
| for word, box in predictions: | |
| if box[:, 0].min() < image.shape[1] / 2: | |
| left.append((word, box)) | |
| else: | |
| right.append((word, box)) | |
| ax.set_yticks([]) | |
| ax.set_xticks([]) | |
| for side, group in zip(["left", "right"], [left, right]): | |
| for index, (text, box) in enumerate(group): | |
| y = 1 - (index / len(group)) | |
| xy = box[0] / np.array([image.shape[1], image.shape[0]]) | |
| xy[1] = 1 - xy[1] | |
| ax.annotate( | |
| text=text, | |
| xy=xy, | |
| xytext=(-0.05 if side == "left" else 1.05, y), | |
| xycoords="axes fraction", | |
| arrowprops={"arrowstyle": "->", "color": "r"}, | |
| color="r", | |
| fontsize=14, | |
| horizontalalignment="right" if side == "left" else "left", | |
| ) | |
| return ax | |
| def drawBoxes(image, boxes, color=(255, 0, 0), thickness=5, boxes_format="boxes"): | |
| """Draw boxes onto an image. | |
| Args: | |
| image: The image on which to draw the boxes. | |
| boxes: The boxes to draw. | |
| color: The color for each box. | |
| thickness: The thickness for each box. | |
| boxes_format: The format used for providing the boxes. Options are | |
| "boxes" which indicates an array with shape(N, 4, 2) where N is the | |
| number of boxes and each box is a list of four points) as provided | |
| by `keras_ocr.detection.Detector.detect`, "lines" (a list of | |
| lines where each line itself is a list of (box, character) tuples) as | |
| provided by `keras_ocr.data_generation.get_image_generator`, | |
| or "predictions" where boxes is by itself a list of (word, box) tuples | |
| as provided by `keras_ocr.pipeline.Pipeline.recognize` or | |
| `keras_ocr.recognition.Recognizer.recognize_from_boxes`. | |
| """ | |
| if len(boxes) == 0: | |
| return image | |
| canvas = image.copy() | |
| if boxes_format == "lines": | |
| revised_boxes = [] | |
| for line in boxes: | |
| for box, _ in line: | |
| revised_boxes.append(box) | |
| boxes = revised_boxes | |
| if boxes_format == "predictions": | |
| revised_boxes = [] | |
| for _, box in boxes: | |
| revised_boxes.append(box) | |
| boxes = revised_boxes | |
| for box in boxes: | |
| cv2.polylines( | |
| img=canvas, | |
| pts=box[np.newaxis].astype("int32"), | |
| color=color, | |
| thickness=thickness, | |
| isClosed=True, | |
| ) | |
| return canvas | |
| def adjust_boxes( | |
| boxes, | |
| scale=1, | |
| boxes_format: tx.Literal["boxes", "predictions", "lines"] = "boxes", | |
| ) -> typing.Union[ | |
| np.ndarray, | |
| typing.List[typing.List[typing.Tuple[np.ndarray, str]]], | |
| typing.List[typing.Tuple[str, np.ndarray]], | |
| ]: | |
| """Adjust boxes using a given scale and offset. | |
| Args: | |
| boxes: The boxes to adjust | |
| boxes_format: The format for the boxes. See the `drawBoxes` function | |
| for an explanation on the options. | |
| scale: The scale to apply | |
| """ | |
| if scale == 1: | |
| return boxes | |
| if boxes_format == "boxes": | |
| return np.array(boxes) * scale | |
| if boxes_format == "lines": | |
| return [ | |
| [(np.array(box) * scale, character) for box, character in line] | |
| for line in boxes | |
| ] | |
| if boxes_format == "predictions": | |
| return [(word, np.array(box) * scale) for word, box in boxes] | |
| raise NotImplementedError(f"Unsupported boxes format: {boxes_format}") | |
| def augment( | |
| boxes, | |
| augmenter: imgaug.augmenters.meta.Augmenter, | |
| image=None, | |
| boxes_format="boxes", | |
| image_shape=None, | |
| area_threshold=0.5, | |
| min_area=None, | |
| ): | |
| """Augment an image and associated boxes together. | |
| Args: | |
| image: The image which we wish to apply the augmentation. | |
| boxes: The boxes that will be augmented together with the image | |
| boxes_format: The format for the boxes. See the `drawBoxes` function | |
| for an explanation on the options. | |
| image_shape: The shape of the input image if no image will be provided. | |
| area_threshold: Fraction of bounding box that we require to be | |
| in augmented image to include it. | |
| min_area: The minimum area for a character to be included. | |
| """ | |
| if image is None and image_shape is None: | |
| raise ValueError('One of "image" or "image_shape" must be provided.') | |
| augmenter = augmenter.to_deterministic() | |
| if image is not None: | |
| image_augmented = augmenter(image=image) | |
| image_shape = image.shape[:2] | |
| image_augmented_shape = image_augmented.shape[:2] | |
| else: | |
| image_augmented = None | |
| width_augmented, height_augmented = augmenter.augment_keypoints( | |
| imgaug.KeypointsOnImage.from_xy_array( | |
| xy=[[image_shape[1], image_shape[0]]], shape=image_shape | |
| ) | |
| ).to_xy_array()[0] | |
| image_augmented_shape = (height_augmented, width_augmented) | |
| def box_inside_image(box): | |
| area_before = cv2.contourArea(np.array(box, dtype="int32")[:, np.newaxis, :]) | |
| if area_before == 0: | |
| return False, box | |
| clipped = box.copy() | |
| clipped[:, 0] = clipped[:, 0].clip(0, image_augmented_shape[1]) | |
| clipped[:, 1] = clipped[:, 1].clip(0, image_augmented_shape[0]) | |
| area_after = cv2.contourArea(np.array(clipped, dtype="int32")[:, np.newaxis, :]) | |
| return ((area_after / area_before) >= area_threshold) and ( | |
| min_area is None or area_after > min_area | |
| ), clipped | |
| def augment_box(box): | |
| return augmenter.augment_keypoints( | |
| imgaug.KeypointsOnImage.from_xy_array(box, shape=image_shape) | |
| ).to_xy_array() | |
| if boxes_format == "boxes": | |
| boxes_augmented = [ | |
| box | |
| for inside, box in [ | |
| box_inside_image(box) for box in map(augment_box, boxes) | |
| ] | |
| if inside | |
| ] | |
| elif boxes_format == "lines": | |
| boxes_augmented = [ | |
| [(augment_box(box), character) for box, character in line] for line in boxes | |
| ] | |
| boxes_augmented = [ | |
| [ | |
| (box, character) | |
| for (inside, box), character in [ | |
| (box_inside_image(box), character) for box, character in line | |
| ] | |
| if inside | |
| ] | |
| for line in boxes_augmented | |
| ] | |
| # Sometimes all the characters in a line are removed. | |
| boxes_augmented = [line for line in boxes_augmented if line] | |
| elif boxes_format == "predictions": | |
| boxes_augmented = [(word, augment_box(box)) for word, box in boxes] | |
| boxes_augmented = [ | |
| (word, box) | |
| for word, (inside, box) in [ | |
| (word, box_inside_image(box)) for word, box in boxes_augmented | |
| ] | |
| if inside | |
| ] | |
| else: | |
| raise NotImplementedError(f"Unsupported boxes format: {boxes_format}") | |
| return image_augmented, boxes_augmented | |
| def pad(image, width: int, height: int, cval: int = 255): | |
| """Pad an image to a desired size. Raises an exception if image | |
| is larger than desired size. | |
| Args: | |
| image: The input image | |
| width: The output width | |
| height: The output height | |
| cval: The value to use for filling the image. | |
| """ | |
| output_shape: typing.Union[typing.Tuple[int, int, int], typing.Tuple[int, int]] | |
| if len(image.shape) == 3: | |
| output_shape = (height, width, image.shape[-1]) | |
| else: | |
| output_shape = (height, width) | |
| assert height >= output_shape[0], "Input height must be less than output height." | |
| assert width >= output_shape[1], "Input width must be less than output width." | |
| padded = np.zeros(output_shape, dtype=image.dtype) + cval | |
| padded[: image.shape[0], : image.shape[1]] = image | |
| return padded | |
| def resize_image(image, max_scale, max_size): | |
| """Obtain the optimal resized image subject to a maximum scale | |
| and maximum size. | |
| Args: | |
| image: The input image | |
| max_scale: The maximum scale to apply | |
| max_size: The maximum size to return | |
| """ | |
| if max(image.shape) * max_scale > max_size: | |
| # We are constrained by the maximum size | |
| scale = max_size / max(image.shape) | |
| else: | |
| # We are contrained by scale | |
| scale = max_scale | |
| return ( | |
| cv2.resize( | |
| image, dsize=(int(image.shape[1] * scale), int(image.shape[0] * scale)) | |
| ), | |
| scale, | |
| ) | |
| # pylint: disable=too-many-arguments | |
| def fit( | |
| image, | |
| width: int, | |
| height: int, | |
| cval: int = 255, | |
| mode="letterbox", | |
| return_scale=False, | |
| ): | |
| """Obtain a new image, fit to the specified size. | |
| Args: | |
| image: The input image | |
| width: The new width | |
| height: The new height | |
| cval: The constant value to use to fill the remaining areas of | |
| the image | |
| return_scale: Whether to return the scale used for the image | |
| Returns: | |
| The new image | |
| """ | |
| fitted = None | |
| x_scale = width / image.shape[1] | |
| y_scale = height / image.shape[0] | |
| if x_scale == 1 and y_scale == 1: | |
| fitted = image | |
| scale = 1 | |
| elif (x_scale <= y_scale and mode == "letterbox") or ( | |
| x_scale >= y_scale and mode == "crop" | |
| ): | |
| scale = width / image.shape[1] | |
| resize_width = width | |
| resize_height = (width / image.shape[1]) * image.shape[0] | |
| else: | |
| scale = height / image.shape[0] | |
| resize_height = height | |
| resize_width = scale * image.shape[1] | |
| if fitted is None: | |
| resize_width, resize_height = map(int, [resize_width, resize_height]) | |
| if mode == "letterbox": | |
| fitted = np.zeros((height, width, 3), dtype="uint8") + cval | |
| image = cv2.resize(image, dsize=(resize_width, resize_height)) | |
| fitted[: image.shape[0], : image.shape[1]] = image[:height, :width] | |
| elif mode == "crop": | |
| image = cv2.resize(image, dsize=(resize_width, resize_height)) | |
| fitted = image[:height, :width] | |
| else: | |
| raise NotImplementedError(f"Unsupported mode: {mode}") | |
| if not return_scale: | |
| return fitted | |
| return fitted, scale | |
| def read_and_fit( | |
| filepath_or_array: typing.Union[str, np.ndarray], | |
| width: int, | |
| height: int, | |
| cval: int = 255, | |
| mode="letterbox", | |
| ): | |
| """Read an image from disk and fit to the specified size. | |
| Args: | |
| filepath: The path to the image or numpy array of shape HxWx3 | |
| width: The new width | |
| height: The new height | |
| cval: The constant value to use to fill the remaining areas of | |
| the image | |
| mode: The mode to pass to "fit" (crop or letterbox) | |
| Returns: | |
| The new image | |
| """ | |
| image = ( | |
| read(filepath_or_array) | |
| if isinstance(filepath_or_array, str) | |
| else filepath_or_array | |
| ) | |
| image = fit(image=image, width=width, height=height, cval=cval, mode=mode) | |
| return image | |
| def sha256sum(filename): | |
| """Compute the sha256 hash for a file.""" | |
| h = hashlib.sha256() | |
| b = bytearray(128 * 1024) | |
| mv = memoryview(b) | |
| with open(filename, "rb", buffering=0) as f: | |
| for n in iter(lambda: f.readinto(mv), 0): # type: ignore | |
| h.update(mv[:n]) | |
| return h.hexdigest() | |
| def get_default_cache_dir(): | |
| return os.environ.get( | |
| "KERAS_OCR_CACHE_DIR", os.path.expanduser(os.path.join("~", ".keras-ocr")) | |
| ) | |
| def download_and_verify(url, sha256=None, cache_dir=None, verbose=True, filename=None): | |
| """Download a file to a cache directory and verify it with a sha256 | |
| hash. | |
| Args: | |
| url: The file to download | |
| sha256: The sha256 hash to check. If the file already exists and the hash | |
| matches, we don't download it again. | |
| cache_dir: The directory in which to cache the file. The default is | |
| `~/.keras-ocr`. | |
| verbose: Whether to log progress | |
| filename: The filename to use for the file. By default, the filename is | |
| derived from the URL. | |
| """ | |
| if cache_dir is None: | |
| cache_dir = get_default_cache_dir() | |
| if filename is None: | |
| filename = os.path.basename(urllib.parse.urlparse(url).path) | |
| filepath = os.path.join(cache_dir, filename) | |
| os.makedirs(os.path.split(filepath)[0], exist_ok=True) | |
| if verbose: | |
| print("Looking for " + filepath) | |
| if not os.path.isfile(filepath) or (sha256 and sha256sum(filepath) != sha256): | |
| if verbose: | |
| print("Downloading " + filepath) | |
| urllib.request.urlretrieve(url, filepath) | |
| assert sha256 is None or sha256 == sha256sum( | |
| filepath | |
| ), "Error occurred verifying sha256." | |
| return filepath | |
| def get_rotated_box( | |
| points, | |
| ) -> typing.Tuple[np.ndarray, float,]: | |
| """Obtain the parameters of a rotated box. | |
| Returns: | |
| The vertices of the rotated box in top-left, | |
| top-right, bottom-right, bottom-left order along | |
| with the angle of rotation about the bottom left corner. | |
| """ | |
| try: | |
| mp = geometry.MultiPoint(points=points) | |
| pts = np.array(list(zip(*mp.minimum_rotated_rectangle.exterior.xy)))[ | |
| :-1 | |
| ] # noqa: E501 | |
| except AttributeError: | |
| # There weren't enough points for the minimum rotated rectangle function | |
| pts = points | |
| # The code below is taken from | |
| # https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py | |
| # sort the points based on their x-coordinates | |
| xSorted = pts[np.argsort(pts[:, 0]), :] | |
| # grab the left-most and right-most points from the sorted | |
| # x-roodinate points | |
| leftMost = xSorted[:2, :] | |
| rightMost = xSorted[2:, :] | |
| # now, sort the left-most coordinates according to their | |
| # y-coordinates so we can grab the top-left and bottom-left | |
| # points, respectively | |
| leftMost = leftMost[np.argsort(leftMost[:, 1]), :] | |
| (tl, bl) = leftMost | |
| # now that we have the top-left coordinate, use it as an | |
| # anchor to calculate the Euclidean distance between the | |
| # top-left and right-most points; by the Pythagorean | |
| # theorem, the point with the largest distance will be | |
| # our bottom-right point | |
| D = spatial.distance.cdist(tl[np.newaxis], rightMost, "euclidean")[0] | |
| (br, tr) = rightMost[np.argsort(D)[::-1], :] | |
| # return the coordinates in top-left, top-right, | |
| # bottom-right, and bottom-left order | |
| pts = np.array([tl, tr, br, bl], dtype="float32") | |
| rotation = np.arctan((tl[0] - bl[0]) / (tl[1] - bl[1])) | |
| return pts, rotation | |
| def fix_line(line): | |
| """Given a list of (box, character) tuples, return a revised | |
| line with a consistent ordering of left-to-right or top-to-bottom, | |
| with each box provided with (top-left, top-right, bottom-right, bottom-left) | |
| ordering. | |
| Returns: | |
| A tuple that is the fixed line as well as a string indicating | |
| whether the line is horizontal or vertical. | |
| """ | |
| line = [(get_rotated_box(box)[0], character) for box, character in line] | |
| centers = np.array([box.mean(axis=0) for box, _ in line]) | |
| sortedx = centers[:, 0].argsort() | |
| sortedy = centers[:, 1].argsort() | |
| if np.diff(centers[sortedy][:, 1]).sum() > np.diff(centers[sortedx][:, 0]).sum(): | |
| return [line[idx] for idx in sortedy], "vertical" | |
| return [line[idx] for idx in sortedx], "horizontal" | |