Spaces:

mapecim
/

trabajoIA

Runtime error

App Files Files Community

trabajoIA / app.py

mapecim

Update app.py

92e4f0d almost 3 years ago

raw

history blame contribute delete

6.97 kB

	import gradio as gr
	import numpy as np
	#import imutils
	import cv2
	import torch
	import matplotlib.pyplot as plt
	from pdf2image import convert_from_path, convert_from_bytes
	from collections import namedtuple
	import pytesseract
	import argparse

	def convert_pdf_to_png(file_name):
	pages = convert_from_path(file_name)
	for i, page in enumerate(pages):
	page.save(f"{file_name[:-4]}_{i+1}.png", "PNG")


	def alinearImagen(image, template, maxFeatures = 500, keepPercent = 0.2, debug = False):
	# Convertimos ambas imágenes en escala de grises
	imageGray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	templateGray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)

	# Usamos ORB para detectar los keypoints y extraer los descriptores invariantes
	orb = cv2.ORB_create(maxFeatures)
	(kpsA, descsA) = orb.detectAndCompute(imageGray, None)
	(kpsB, descsB) = orb.detectAndCompute(templateGray, None)

	# Hacemos que coincidan
	method = cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING
	matcher = cv2.DescriptorMatcher_create(method)
	matches = matcher.match(descsA, descsB, None)

	# Ordenamos los matches por si distancia, cuando más pequeña sea, más similares son las regiones de los keypoints
	matches = sorted(matches, key=lambda x:x.distance)

	# Almacenamos únicamente los mejores matches
	keep = int(len(matches) * keepPercent)
	matches = matches[:keep]

	# Visualizamos la imagen con las líneas de relación correspondientes si estamos en modo debug
	if debug:
	matchedVis = cv2.drawMatches(image, kpsA, template, kpsB, matches, None)
	matchedVis = imutils.resize(matchedVis, width=1000)
	cv2_imshow(matchedVis)
	cv2.waitKey(0)

	# Reservamos memorias para las coordenadas de los keypoints con mejores matches
	ptsA = np.zeros((len(matches), 2), dtype="float")
	ptsB = np.zeros((len(matches), 2), dtype="float")

	# Recorremos los matches
	for (i, m) in enumerate(matches):
	# Indicamos que los keypoints A y B son mapas de uno a otro
	ptsA[i] = kpsA[m.queryIdx].pt
	ptsB[i] = kpsB[m.trainIdx].pt

	# Calcula la matriz homogrófica entre los dos conjuntos de puntos de matches
	(H, mask) = cv2.findHomography(ptsA, ptsB, method=cv2.RANSAC)

	# Usamos la matriz homográfica para alinear las imágenes
	(h, w) = template.shape[:2]
	aligned = cv2.warpPerspective(image, H, (w, h))

	# Devolvemos la imagen alineada
	return aligned

	def cleanup_text(text):
	return "".join([c if ord(c) < 128 else "" for c in text]).strip()

	def aplicaFiltro(imagen):
	r,g,b = cv2.split(imagen)
	th = cv2.adaptiveThreshold(b,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,7)
	resultado = cv2.fastNlMeansDenoising(th, [50])
	return resultado

	def recorte(imagen):
	_, thresh1 = cv2.threshold(imagen, 0, 255, cv2.THRESH_OTSU \| cv2.THRESH_BINARY_INV)
	cnts = cv2.findContours(thresh1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	cnts = imutils.grab_contours(cnts)

	xs = []
	ys = []
	hs = []
	ws = []

	for c in cnts:
	(x, y, w, h) = cv2.boundingRect(c)
	if w >= 5 and h >= 5 and w < 100 and h < 30:
	xs.append(x)
	ys.append(y)
	hs.append(h)
	ws.append(w)

	if len(xs) > 0:
	x1 = min(xs)
	y1 = min(ys)
	h1 = max(ys) + max(hs) - y1
	w1 = max(xs) + max(ws) - x1

	clone = imagen.copy()
	im = imagen[y1:y1 + h1, x1:x1 + w1]
	return (im, True)
	else:
	return (None, False)

	from transformers import TrOCRProcessor, VisionEncoderDecoderModel
	import requests

	processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
	model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")

	def OCR3_F(nombreFichero):
	# Primero convertimos el fichero dado a PNG
	file_name = nombreFichero.name
	convert_pdf_to_png(file_name)

	# Leemos las dos imagenes generadas
	fich1 = cv2.imread(nombreFichero[:-4]+"_1.png")
	fich2 = cv2.imread(nombreFichero[:-4]+"_2.png")

	# Leemos las dos plantillas
	plantilla1 = cv2.imread("plantilla_0.png")
	plantilla2 = cv2.imread("plantilla_1.png")

	# Alineamos las dos imágenes, cada una con su correspondiente plantilla
	fich1_alineado = alinearImagen(fich1,plantilla1)
	fich2_alineado = alinearImagen(fich2,plantilla2)

	# Concatenamos las imágenes
	fichero = cv2.vconcat([fich1_alineado,fich2_alineado])
	plantilla = cv2.vconcat([plantilla1,plantilla2])

	# Generamos las localizaciones en las que aplicaremos OCR
	OCRLocation = namedtuple("OCRLocation", ["id", "bbox", "onlyNumber"])
	OCR_LOCATIONS = [
	OCRLocation("hojaCatastral", (1550, 55, 200, 32), True),
	OCRLocation("numeroParcela", (1550, 93, 200, 36), True),
	OCRLocation("idemPoligono", (1550, 134, 200, 35), True),
	OCRLocation("idemFotografia", (1550, 134, 200, 34), False),

	OCRLocation("terminoMunicipal", (1240, 254, 520, 38), False),
	OCRLocation("pago", (1300, 305, 460, 40), False),
	OCRLocation("partidoJudicial", (500, 312, 442, 38), False),

	OCRLocation("nombre", (386, 634, 604, 43), False),
	OCRLocation("pueblo", (1006, 632, 240, 44), False),

	OCRLocation("cultivos1", (212, 1373, 493, 43), False),
	OCRLocation("cultivos2", (212, 1426, 493, 42), False),
	OCRLocation("cultivos3", (212, 1480, 493, 41), False)
	]

	fichFiltro = aplicaFiltro(fichero)

	parsingResults = []
	# loop over the locations of the document we are going to OCR
	for loc in OCR_LOCATIONS:
	# extract the OCR ROI from the aligned image
	(x, y, w, h) = loc.bbox
	roi = fichFiltro[y:y + h, x:x + w]

	im,b = recorte(roi)
	if (b == True):
	# OCR the ROI using Tesseract
	rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
	pixel_values = processor(rgb, return_tensors="pt").pixel_values
	generated_ids = model.generate(pixel_values)
	text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	for line in text.split("\n"):
	# if the line is empty, ignore it
	if len(line) == 0:
	continue

	parsingResults.append((loc, line))

	results = {}
	for (loc, line) in parsingResults:
	r = results.get(loc.id, None)
	if r is None:
	results[loc.id] = (line, loc._asdict())
	else:
	(existingText, loc) = r
	text = "{}\n{}".format(existingText, line)
	results[loc["id"]] = (text, loc)


	for (locID, result) in results.items():
	(text, loc) = result

	(x, y, w, h) = loc["bbox"]
	clean = cleanup_text(text)

	cv2.rectangle(fichero, (x, y), (x + w, y + h), (0, 255, 0), 2)

	for (i, line) in enumerate(text.split("\n")):
	startY = y + (i * 70) + 40
	cv2.putText(fichero, line, (x, startY),
	cv2.FONT_HERSHEY_SIMPLEX, 1.8, (0, 0, 255), 5)

	return fichero

	# Creamos la interfaz y la lanzamos.
	gr.Interface(fn=OCR3_F, inputs="file", outputs="image", examples=["fich3.pdf", "fich4.pdf"]).launch()