Spaces:

fredcaixeta
/

ocr_extractor

Sleeping

App Files Files Community

fredcaixeta commited on Oct 9, 2025

Commit

09381b1

1 Parent(s): 84a0fc4

go

Browse files

Files changed (4) hide show

app.py +4 -3
deskew_imagem.py +83 -0
ocr_script.py +50 -0
packages.txt +1 -0

app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 import gradio as gr
-def ocr_tesseract_only(img):  # img pode ser PIL/np/str conforme 'type'
-    # chamar seu OCR aqui e retornar texto
-    return "texto extraído"
 with gr.Blocks() as demo:
     gr.Markdown("## Text OCR Tesseract only")

 import gradio as gr
+from ocr_script import ocr_tesseract_only
+# def ocr_tesseract_only(img):  # img pode ser PIL/np/str conforme 'type'
+#     # chamar seu OCR aqui e retornar texto
+#     return "texto extraído"
 with gr.Blocks() as demo:
     gr.Markdown("## Text OCR Tesseract only")

deskew_imagem.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import pandas as pd
+from matplotlib import pyplot as plt
+import regex as re
+import cv2
+import pytesseract
+pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
+def display(im_path):
+    dpi = 100
+    im_data = plt.imread(im_path)
+    height, width = im_data.shape[:2]
+    figsize = width / float(dpi), height / float(dpi)
+    # create a figure of the right size with one axes that takes up the full fig
+    fig = plt.figure(figsize = figsize)
+    ax = fig.add_axes([0, 0, 1, 1]) # type: ignore
+    # hide spines, ticks, etc
+    ax.axis('off')
+    ax.imshow(im_data, cmap='gray')
+    plt.show()
+    return im_data
+def getSkewAngle(cvImage) -> float:
+    # Prep image, copy, convert to gray scale, blur, and threshold
+    newImage = cvImage.copy()
+    gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
+    blur = cv2.GaussianBlur(gray, (9, 9), 0)
+    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+    # Apply dilate to merge text into meaningful lines/paragraphs.
+    # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.
+    # But use smaller kernel on Y axis to separate between different blocks of text
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
+    dilate = cv2.dilate(thresh, kernel, iterations=2)
+    # Find all contours
+    contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+    contours = sorted(contours, key = cv2.contourArea, reverse = True)
+    for c in contours:
+        rect = cv2.boundingRect(c)
+        x,y,w,h = rect
+        cv2.rectangle(newImage,(x,y),(x+w,y+h),(0,255,0),2)
+    # Find largest contour and surround in min area box
+    largestContour = contours[0]
+    #print (len(contours))
+    minAreaRect = cv2.minAreaRect(largestContour)
+    cv2.imwrite("temp/boxes.jpg", newImage)
+    # Determine the angle. Convert it to the value that was originally used to obtain skewed image
+    angle = minAreaRect[-1]
+    if angle < -45:
+        angle = 90 + angle
+    if angle == 90:
+        return 0
+    return -1.0 * angle
+# Rotate the image around its center
+def rotateImage(cvImage, angle: float):
+    newImage = cvImage.copy()
+    (h, w) = newImage.shape[:2]
+    center = (w // 2, h // 2)
+    M = cv2.getRotationMatrix2D(center, angle, 1.0)
+    newImage = cv2.warpAffine(newImage, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
+    return newImage
+# Deskew image
+def deskew(cvImage):
+    cvImage = cv2.imread(cvImage)
+    angle = getSkewAngle(cvImage)
+    return rotateImage(cvImage, angle=angle)
+if __name__ == "__main__":
+    fixed = deskew(r'data/pagina_2_metade.png')
+    print("data/pagina_2_metade_deskew.png deskewed")
+    cv2.imwrite(r"data/pagina_2_metade_deskew.png", fixed)

ocr_script.py ADDED Viewed

	@@ -0,0 +1,50 @@

+import numpy as np
+import cv2
+import pytesseract
+from PIL import Image
+def pil_to_cv_bgr(pil_img: Image.Image) -> np.ndarray:
+    arr = np.array(pil_img.convert("RGB"))              # RGB
+    cv_bgr = cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)       # BGR p/ OpenCV
+    return cv_bgr
+def getSkewAngle(cvImage: np.ndarray) -> float:
+    newImage = cvImage.copy()
+    gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
+    blur = cv2.GaussianBlur(gray, (9, 9), 0)
+    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
+    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
+    dilate = cv2.dilate(thresh, kernel, iterations=2)
+    contours, _ = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+    contours = sorted(contours, key=cv2.contourArea, reverse=True)
+    if not contours:
+        return 0.0
+    largestContour = contours[0]
+    minAreaRect = cv2.minAreaRect(largestContour)
+    angle = minAreaRect[-1]
+    if angle < -45:
+        angle = 90 + angle
+    if angle == 90:
+        return 0.0
+    return -1.0 * angle
+def rotateImage(cvImage: np.ndarray, angle: float) -> np.ndarray:
+    newImage = cvImage.copy()
+    (h, w) = newImage.shape[:2]
+    center = (w // 2, h // 2)
+    M = cv2.getRotationMatrix2D(center, angle, 1.0)
+    newImage = cv2.warpAffine(newImage, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
+    return newImage
+def deskew_from_pil(pil_img: Image.Image) -> np.ndarray:
+    cv_img = pil_to_cv_bgr(pil_img)
+    angle = getSkewAngle(cv_img)
+    return rotateImage(cv_img, angle)
+def ocr_tesseract_only(pil_img: Image.Image, lang="por", config="--psm 6 --oem 3") -> str:
+    cv_img = deskew_from_pil(pil_img)
+    # Converter BGR -> RGB para PIL antes do Tesseract (pytesseract aceita PIL/numpy RGB)
+    cv_rgb = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)
+    pil_for_ocr = Image.fromarray(cv_rgb)
+    text = pytesseract.image_to_string(pil_for_ocr, lang=lang, config=config)
+    return text

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ tesseract-ocr python3-pil tesseract-ocr-eng tesseract-ocr-script-latn