Spaces:
Sleeping
Sleeping
fredcaixeta commited on
Commit ·
09381b1
1
Parent(s): 84a0fc4
go
Browse files- app.py +4 -3
- deskew_imagem.py +83 -0
- ocr_script.py +50 -0
- packages.txt +1 -0
app.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
| 1 |
import gradio as gr
|
|
|
|
| 2 |
|
| 3 |
-
def ocr_tesseract_only(img): # img pode ser PIL/np/str conforme 'type'
|
| 4 |
-
|
| 5 |
-
|
| 6 |
|
| 7 |
with gr.Blocks() as demo:
|
| 8 |
gr.Markdown("## Text OCR Tesseract only")
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
from ocr_script import ocr_tesseract_only
|
| 3 |
|
| 4 |
+
# def ocr_tesseract_only(img): # img pode ser PIL/np/str conforme 'type'
|
| 5 |
+
# # chamar seu OCR aqui e retornar texto
|
| 6 |
+
# return "texto extraído"
|
| 7 |
|
| 8 |
with gr.Blocks() as demo:
|
| 9 |
gr.Markdown("## Text OCR Tesseract only")
|
deskew_imagem.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from matplotlib import pyplot as plt
|
| 3 |
+
import regex as re
|
| 4 |
+
import cv2
|
| 5 |
+
import pytesseract
|
| 6 |
+
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
|
| 7 |
+
|
| 8 |
+
def display(im_path):
|
| 9 |
+
dpi = 100
|
| 10 |
+
|
| 11 |
+
im_data = plt.imread(im_path)
|
| 12 |
+
|
| 13 |
+
height, width = im_data.shape[:2]
|
| 14 |
+
|
| 15 |
+
figsize = width / float(dpi), height / float(dpi)
|
| 16 |
+
|
| 17 |
+
# create a figure of the right size with one axes that takes up the full fig
|
| 18 |
+
fig = plt.figure(figsize = figsize)
|
| 19 |
+
ax = fig.add_axes([0, 0, 1, 1]) # type: ignore
|
| 20 |
+
# hide spines, ticks, etc
|
| 21 |
+
ax.axis('off')
|
| 22 |
+
|
| 23 |
+
ax.imshow(im_data, cmap='gray')
|
| 24 |
+
|
| 25 |
+
plt.show()
|
| 26 |
+
|
| 27 |
+
return im_data
|
| 28 |
+
|
| 29 |
+
def getSkewAngle(cvImage) -> float:
|
| 30 |
+
# Prep image, copy, convert to gray scale, blur, and threshold
|
| 31 |
+
newImage = cvImage.copy()
|
| 32 |
+
gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
|
| 33 |
+
blur = cv2.GaussianBlur(gray, (9, 9), 0)
|
| 34 |
+
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
| 35 |
+
|
| 36 |
+
# Apply dilate to merge text into meaningful lines/paragraphs.
|
| 37 |
+
# Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.
|
| 38 |
+
# But use smaller kernel on Y axis to separate between different blocks of text
|
| 39 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
|
| 40 |
+
dilate = cv2.dilate(thresh, kernel, iterations=2)
|
| 41 |
+
|
| 42 |
+
# Find all contours
|
| 43 |
+
contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
| 44 |
+
contours = sorted(contours, key = cv2.contourArea, reverse = True)
|
| 45 |
+
for c in contours:
|
| 46 |
+
rect = cv2.boundingRect(c)
|
| 47 |
+
x,y,w,h = rect
|
| 48 |
+
cv2.rectangle(newImage,(x,y),(x+w,y+h),(0,255,0),2)
|
| 49 |
+
|
| 50 |
+
# Find largest contour and surround in min area box
|
| 51 |
+
largestContour = contours[0]
|
| 52 |
+
#print (len(contours))
|
| 53 |
+
minAreaRect = cv2.minAreaRect(largestContour)
|
| 54 |
+
cv2.imwrite("temp/boxes.jpg", newImage)
|
| 55 |
+
# Determine the angle. Convert it to the value that was originally used to obtain skewed image
|
| 56 |
+
angle = minAreaRect[-1]
|
| 57 |
+
|
| 58 |
+
if angle < -45:
|
| 59 |
+
angle = 90 + angle
|
| 60 |
+
|
| 61 |
+
if angle == 90:
|
| 62 |
+
return 0
|
| 63 |
+
return -1.0 * angle
|
| 64 |
+
|
| 65 |
+
# Rotate the image around its center
|
| 66 |
+
def rotateImage(cvImage, angle: float):
|
| 67 |
+
newImage = cvImage.copy()
|
| 68 |
+
(h, w) = newImage.shape[:2]
|
| 69 |
+
center = (w // 2, h // 2)
|
| 70 |
+
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
| 71 |
+
newImage = cv2.warpAffine(newImage, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
|
| 72 |
+
return newImage
|
| 73 |
+
|
| 74 |
+
# Deskew image
|
| 75 |
+
def deskew(cvImage):
|
| 76 |
+
cvImage = cv2.imread(cvImage)
|
| 77 |
+
angle = getSkewAngle(cvImage)
|
| 78 |
+
return rotateImage(cvImage, angle=angle)
|
| 79 |
+
|
| 80 |
+
if __name__ == "__main__":
|
| 81 |
+
fixed = deskew(r'data/pagina_2_metade.png')
|
| 82 |
+
print("data/pagina_2_metade_deskew.png deskewed")
|
| 83 |
+
cv2.imwrite(r"data/pagina_2_metade_deskew.png", fixed)
|
ocr_script.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import cv2
|
| 3 |
+
import pytesseract
|
| 4 |
+
from PIL import Image
|
| 5 |
+
|
| 6 |
+
def pil_to_cv_bgr(pil_img: Image.Image) -> np.ndarray:
|
| 7 |
+
arr = np.array(pil_img.convert("RGB")) # RGB
|
| 8 |
+
cv_bgr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR) # BGR p/ OpenCV
|
| 9 |
+
return cv_bgr
|
| 10 |
+
|
| 11 |
+
def getSkewAngle(cvImage: np.ndarray) -> float:
|
| 12 |
+
newImage = cvImage.copy()
|
| 13 |
+
gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
|
| 14 |
+
blur = cv2.GaussianBlur(gray, (9, 9), 0)
|
| 15 |
+
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
|
| 16 |
+
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
|
| 17 |
+
dilate = cv2.dilate(thresh, kernel, iterations=2)
|
| 18 |
+
contours, _ = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
|
| 19 |
+
contours = sorted(contours, key=cv2.contourArea, reverse=True)
|
| 20 |
+
if not contours:
|
| 21 |
+
return 0.0
|
| 22 |
+
largestContour = contours[0]
|
| 23 |
+
minAreaRect = cv2.minAreaRect(largestContour)
|
| 24 |
+
angle = minAreaRect[-1]
|
| 25 |
+
if angle < -45:
|
| 26 |
+
angle = 90 + angle
|
| 27 |
+
if angle == 90:
|
| 28 |
+
return 0.0
|
| 29 |
+
return -1.0 * angle
|
| 30 |
+
|
| 31 |
+
def rotateImage(cvImage: np.ndarray, angle: float) -> np.ndarray:
|
| 32 |
+
newImage = cvImage.copy()
|
| 33 |
+
(h, w) = newImage.shape[:2]
|
| 34 |
+
center = (w // 2, h // 2)
|
| 35 |
+
M = cv2.getRotationMatrix2D(center, angle, 1.0)
|
| 36 |
+
newImage = cv2.warpAffine(newImage, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
|
| 37 |
+
return newImage
|
| 38 |
+
|
| 39 |
+
def deskew_from_pil(pil_img: Image.Image) -> np.ndarray:
|
| 40 |
+
cv_img = pil_to_cv_bgr(pil_img)
|
| 41 |
+
angle = getSkewAngle(cv_img)
|
| 42 |
+
return rotateImage(cv_img, angle)
|
| 43 |
+
|
| 44 |
+
def ocr_tesseract_only(pil_img: Image.Image, lang="por", config="--psm 6 --oem 3") -> str:
|
| 45 |
+
cv_img = deskew_from_pil(pil_img)
|
| 46 |
+
# Converter BGR -> RGB para PIL antes do Tesseract (pytesseract aceita PIL/numpy RGB)
|
| 47 |
+
cv_rgb = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
|
| 48 |
+
pil_for_ocr = Image.fromarray(cv_rgb)
|
| 49 |
+
text = pytesseract.image_to_string(pil_for_ocr, lang=lang, config=config)
|
| 50 |
+
return text
|
packages.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
tesseract-ocr python3-pil tesseract-ocr-eng tesseract-ocr-script-latn
|