fredcaixeta commited on
Commit
09381b1
·
1 Parent(s): 84a0fc4
Files changed (4) hide show
  1. app.py +4 -3
  2. deskew_imagem.py +83 -0
  3. ocr_script.py +50 -0
  4. packages.txt +1 -0
app.py CHANGED
@@ -1,8 +1,9 @@
1
  import gradio as gr
 
2
 
3
- def ocr_tesseract_only(img): # img pode ser PIL/np/str conforme 'type'
4
- # chamar seu OCR aqui e retornar texto
5
- return "texto extraído"
6
 
7
  with gr.Blocks() as demo:
8
  gr.Markdown("## Text OCR Tesseract only")
 
1
  import gradio as gr
2
+ from ocr_script import ocr_tesseract_only
3
 
4
+ # def ocr_tesseract_only(img): # img pode ser PIL/np/str conforme 'type'
5
+ # # chamar seu OCR aqui e retornar texto
6
+ # return "texto extraído"
7
 
8
  with gr.Blocks() as demo:
9
  gr.Markdown("## Text OCR Tesseract only")
deskew_imagem.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from matplotlib import pyplot as plt
3
+ import regex as re
4
+ import cv2
5
+ import pytesseract
6
+ pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
7
+
8
+ def display(im_path):
9
+ dpi = 100
10
+
11
+ im_data = plt.imread(im_path)
12
+
13
+ height, width = im_data.shape[:2]
14
+
15
+ figsize = width / float(dpi), height / float(dpi)
16
+
17
+ # create a figure of the right size with one axes that takes up the full fig
18
+ fig = plt.figure(figsize = figsize)
19
+ ax = fig.add_axes([0, 0, 1, 1]) # type: ignore
20
+ # hide spines, ticks, etc
21
+ ax.axis('off')
22
+
23
+ ax.imshow(im_data, cmap='gray')
24
+
25
+ plt.show()
26
+
27
+ return im_data
28
+
29
+ def getSkewAngle(cvImage) -> float:
30
+ # Prep image, copy, convert to gray scale, blur, and threshold
31
+ newImage = cvImage.copy()
32
+ gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
33
+ blur = cv2.GaussianBlur(gray, (9, 9), 0)
34
+ thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
35
+
36
+ # Apply dilate to merge text into meaningful lines/paragraphs.
37
+ # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.
38
+ # But use smaller kernel on Y axis to separate between different blocks of text
39
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
40
+ dilate = cv2.dilate(thresh, kernel, iterations=2)
41
+
42
+ # Find all contours
43
+ contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
44
+ contours = sorted(contours, key = cv2.contourArea, reverse = True)
45
+ for c in contours:
46
+ rect = cv2.boundingRect(c)
47
+ x,y,w,h = rect
48
+ cv2.rectangle(newImage,(x,y),(x+w,y+h),(0,255,0),2)
49
+
50
+ # Find largest contour and surround in min area box
51
+ largestContour = contours[0]
52
+ #print (len(contours))
53
+ minAreaRect = cv2.minAreaRect(largestContour)
54
+ cv2.imwrite("temp/boxes.jpg", newImage)
55
+ # Determine the angle. Convert it to the value that was originally used to obtain skewed image
56
+ angle = minAreaRect[-1]
57
+
58
+ if angle < -45:
59
+ angle = 90 + angle
60
+
61
+ if angle == 90:
62
+ return 0
63
+ return -1.0 * angle
64
+
65
+ # Rotate the image around its center
66
+ def rotateImage(cvImage, angle: float):
67
+ newImage = cvImage.copy()
68
+ (h, w) = newImage.shape[:2]
69
+ center = (w // 2, h // 2)
70
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
71
+ newImage = cv2.warpAffine(newImage, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
72
+ return newImage
73
+
74
+ # Deskew image
75
+ def deskew(cvImage):
76
+ cvImage = cv2.imread(cvImage)
77
+ angle = getSkewAngle(cvImage)
78
+ return rotateImage(cvImage, angle=angle)
79
+
80
+ if __name__ == "__main__":
81
+ fixed = deskew(r'data/pagina_2_metade.png')
82
+ print("data/pagina_2_metade_deskew.png deskewed")
83
+ cv2.imwrite(r"data/pagina_2_metade_deskew.png", fixed)
ocr_script.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import cv2
3
+ import pytesseract
4
+ from PIL import Image
5
+
6
+ def pil_to_cv_bgr(pil_img: Image.Image) -> np.ndarray:
7
+ arr = np.array(pil_img.convert("RGB")) # RGB
8
+ cv_bgr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR) # BGR p/ OpenCV
9
+ return cv_bgr
10
+
11
+ def getSkewAngle(cvImage: np.ndarray) -> float:
12
+ newImage = cvImage.copy()
13
+ gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
14
+ blur = cv2.GaussianBlur(gray, (9, 9), 0)
15
+ thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
16
+ kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
17
+ dilate = cv2.dilate(thresh, kernel, iterations=2)
18
+ contours, _ = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
19
+ contours = sorted(contours, key=cv2.contourArea, reverse=True)
20
+ if not contours:
21
+ return 0.0
22
+ largestContour = contours[0]
23
+ minAreaRect = cv2.minAreaRect(largestContour)
24
+ angle = minAreaRect[-1]
25
+ if angle < -45:
26
+ angle = 90 + angle
27
+ if angle == 90:
28
+ return 0.0
29
+ return -1.0 * angle
30
+
31
+ def rotateImage(cvImage: np.ndarray, angle: float) -> np.ndarray:
32
+ newImage = cvImage.copy()
33
+ (h, w) = newImage.shape[:2]
34
+ center = (w // 2, h // 2)
35
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
36
+ newImage = cv2.warpAffine(newImage, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
37
+ return newImage
38
+
39
+ def deskew_from_pil(pil_img: Image.Image) -> np.ndarray:
40
+ cv_img = pil_to_cv_bgr(pil_img)
41
+ angle = getSkewAngle(cv_img)
42
+ return rotateImage(cv_img, angle)
43
+
44
+ def ocr_tesseract_only(pil_img: Image.Image, lang="por", config="--psm 6 --oem 3") -> str:
45
+ cv_img = deskew_from_pil(pil_img)
46
+ # Converter BGR -> RGB para PIL antes do Tesseract (pytesseract aceita PIL/numpy RGB)
47
+ cv_rgb = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
48
+ pil_for_ocr = Image.fromarray(cv_rgb)
49
+ text = pytesseract.image_to_string(pil_for_ocr, lang=lang, config=config)
50
+ return text
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ tesseract-ocr python3-pil tesseract-ocr-eng tesseract-ocr-script-latn