| import gradio as gr |
| import numpy as np |
| |
| import cv2 |
| import torch |
| import matplotlib.pyplot as plt |
| from pdf2image import convert_from_path, convert_from_bytes |
| from collections import namedtuple |
| import pytesseract |
| import argparse |
|
|
| def convert_pdf_to_png(file_name): |
| pages = convert_from_path(file_name) |
| for i, page in enumerate(pages): |
| page.save(f"{file_name[:-4]}_{i+1}.png", "PNG") |
|
|
|
|
| def alinearImagen(image, template, maxFeatures = 500, keepPercent = 0.2, debug = False): |
| |
| imageGray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) |
| templateGray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) |
| |
| |
| orb = cv2.ORB_create(maxFeatures) |
| (kpsA, descsA) = orb.detectAndCompute(imageGray, None) |
| (kpsB, descsB) = orb.detectAndCompute(templateGray, None) |
| |
| |
| method = cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING |
| matcher = cv2.DescriptorMatcher_create(method) |
| matches = matcher.match(descsA, descsB, None) |
|
|
| |
| matches = sorted(matches, key=lambda x:x.distance) |
|
|
| |
| keep = int(len(matches) * keepPercent) |
| matches = matches[:keep] |
|
|
| |
| if debug: |
| matchedVis = cv2.drawMatches(image, kpsA, template, kpsB, matches, None) |
| matchedVis = imutils.resize(matchedVis, width=1000) |
| cv2_imshow(matchedVis) |
| cv2.waitKey(0) |
|
|
| |
| ptsA = np.zeros((len(matches), 2), dtype="float") |
| ptsB = np.zeros((len(matches), 2), dtype="float") |
|
|
| |
| for (i, m) in enumerate(matches): |
| |
| ptsA[i] = kpsA[m.queryIdx].pt |
| ptsB[i] = kpsB[m.trainIdx].pt |
|
|
| |
| (H, mask) = cv2.findHomography(ptsA, ptsB, method=cv2.RANSAC) |
| |
| |
| (h, w) = template.shape[:2] |
| aligned = cv2.warpPerspective(image, H, (w, h)) |
|
|
| |
| return aligned |
|
|
| def cleanup_text(text): |
| return "".join([c if ord(c) < 128 else "" for c in text]).strip() |
|
|
| def aplicaFiltro(imagen): |
| r,g,b = cv2.split(imagen) |
| th = cv2.adaptiveThreshold(b,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,7) |
| resultado = cv2.fastNlMeansDenoising(th, [50]) |
| return resultado |
|
|
| def recorte(imagen): |
| _, thresh1 = cv2.threshold(imagen, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV) |
| cnts = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) |
| cnts = imutils.grab_contours(cnts) |
|
|
| xs = [] |
| ys = [] |
| hs = [] |
| ws = [] |
| |
| for c in cnts: |
| (x, y, w, h) = cv2.boundingRect(c) |
| if w >= 5 and h >= 5 and w < 100 and h < 30: |
| xs.append(x) |
| ys.append(y) |
| hs.append(h) |
| ws.append(w) |
|
|
| if len(xs) > 0: |
| x1 = min(xs) |
| y1 = min(ys) |
| h1 = max(ys) + max(hs) - y1 |
| w1 = max(xs) + max(ws) - x1 |
|
|
| clone = imagen.copy() |
| im = imagen[y1:y1 + h1, x1:x1 + w1] |
| return (im, True) |
| else: |
| return (None, False) |
|
|
| from transformers import TrOCRProcessor, VisionEncoderDecoderModel |
| import requests |
|
|
| processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten") |
| model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten") |
|
|
| def OCR3_F(nombreFichero): |
| |
| file_name = nombreFichero.name |
| convert_pdf_to_png(file_name) |
|
|
| |
| fich1 = cv2.imread(nombreFichero[:-4]+"_1.png") |
| fich2 = cv2.imread(nombreFichero[:-4]+"_2.png") |
|
|
| |
| plantilla1 = cv2.imread("plantilla_0.png") |
| plantilla2 = cv2.imread("plantilla_1.png") |
|
|
| |
| fich1_alineado = alinearImagen(fich1,plantilla1) |
| fich2_alineado = alinearImagen(fich2,plantilla2) |
|
|
| |
| fichero = cv2.vconcat([fich1_alineado,fich2_alineado]) |
| plantilla = cv2.vconcat([plantilla1,plantilla2]) |
|
|
| |
| OCRLocation = namedtuple("OCRLocation", ["id", "bbox", "onlyNumber"]) |
| OCR_LOCATIONS = [ |
| OCRLocation("hojaCatastral", (1550, 55, 200, 32), True), |
| OCRLocation("numeroParcela", (1550, 93, 200, 36), True), |
| OCRLocation("idemPoligono", (1550, 134, 200, 35), True), |
| OCRLocation("idemFotografia", (1550, 134, 200, 34), False), |
|
|
| OCRLocation("terminoMunicipal", (1240, 254, 520, 38), False), |
| OCRLocation("pago", (1300, 305, 460, 40), False), |
| OCRLocation("partidoJudicial", (500, 312, 442, 38), False), |
|
|
| OCRLocation("nombre", (386, 634, 604, 43), False), |
| OCRLocation("pueblo", (1006, 632, 240, 44), False), |
|
|
| OCRLocation("cultivos1", (212, 1373, 493, 43), False), |
| OCRLocation("cultivos2", (212, 1426, 493, 42), False), |
| OCRLocation("cultivos3", (212, 1480, 493, 41), False) |
| ] |
|
|
| fichFiltro = aplicaFiltro(fichero) |
|
|
| parsingResults = [] |
| |
| for loc in OCR_LOCATIONS: |
| |
| (x, y, w, h) = loc.bbox |
| roi = fichFiltro[y:y + h, x:x + w] |
|
|
| im,b = recorte(roi) |
| if (b == True): |
| |
| rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB) |
| pixel_values = processor(rgb, return_tensors="pt").pixel_values |
| generated_ids = model.generate(pixel_values) |
| text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
|
| for line in text.split("\n"): |
| |
| if len(line) == 0: |
| continue |
| |
| parsingResults.append((loc, line)) |
|
|
| results = {} |
| for (loc, line) in parsingResults: |
| r = results.get(loc.id, None) |
| if r is None: |
| results[loc.id] = (line, loc._asdict()) |
| else: |
| (existingText, loc) = r |
| text = "{}\n{}".format(existingText, line) |
| results[loc["id"]] = (text, loc) |
|
|
|
|
| for (locID, result) in results.items(): |
| (text, loc) = result |
| |
| (x, y, w, h) = loc["bbox"] |
| clean = cleanup_text(text) |
| |
| cv2.rectangle(fichero, (x, y), (x + w, y + h), (0, 255, 0), 2) |
| |
| for (i, line) in enumerate(text.split("\n")): |
| startY = y + (i * 70) + 40 |
| cv2.putText(fichero, line, (x, startY), |
| cv2.FONT_HERSHEY_SIMPLEX, 1.8, (0, 0, 255), 5) |
|
|
| return fichero |
| |
| |
| gr.Interface(fn=OCR3_F, inputs="file", outputs="image", examples=["fich3.pdf", "fich4.pdf"]).launch() |