Spaces:
Runtime error
Runtime error
Commit ·
2cf5420
1
Parent(s): 92e0245
Upload 4 files
Browse files- app.py +86 -5
- best.pt +3 -0
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import re
|
| 3 |
import os
|
|
@@ -7,16 +8,92 @@ import torch
|
|
| 7 |
from PIL import Image
|
| 8 |
from pathlib import Path
|
| 9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
def check_image(image):
|
| 12 |
try:
|
| 13 |
images = convert_from_path(Path(image.name), fmt="jpeg", size=(960,1280))
|
| 14 |
-
return images
|
| 15 |
except:
|
| 16 |
-
return Image.open(image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
| 18 |
def get_attributes(input_img):
|
| 19 |
-
access_token = str(os.environ.get('key'))
|
|
|
|
| 20 |
processor = DonutProcessor.from_pretrained("AA-supply/donut-finetuned-cedula", use_auth_token=access_token)
|
| 21 |
model = VisionEncoderDecoderModel.from_pretrained("AA-supply/donut-finetuned-cedula", use_auth_token=access_token)
|
| 22 |
|
|
@@ -25,8 +102,12 @@ def get_attributes(input_img):
|
|
| 25 |
model.eval()
|
| 26 |
model.to(device)
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
pixel_values = pixel_values.to(device)
|
| 31 |
print(pixel_values.size())
|
| 32 |
# prepare decoder inputs
|
|
|
|
| 1 |
+
|
| 2 |
import gradio as gr
|
| 3 |
import re
|
| 4 |
import os
|
|
|
|
| 8 |
from PIL import Image
|
| 9 |
from pathlib import Path
|
| 10 |
|
| 11 |
+
from models.experimental import attempt_load
|
| 12 |
+
from utils.datasets import LoadImage
|
| 13 |
+
from utils.general import check_img_size, non_max_suppression, scale_coords, set_logging
|
| 14 |
+
from utils.torch_utils import select_device
|
| 15 |
+
import cv2
|
| 16 |
+
#sudo apt-get install poppler-utils Necesario
|
| 17 |
+
|
| 18 |
+
key = str(os.environ.get('key'))
|
| 19 |
|
| 20 |
def check_image(image):
|
| 21 |
try:
|
| 22 |
images = convert_from_path(Path(image.name), fmt="jpeg", size=(960,1280))
|
| 23 |
+
return images
|
| 24 |
except:
|
| 25 |
+
return [Image.open(image)]
|
| 26 |
+
|
| 27 |
+
def crop(files = '', #files
|
| 28 |
+
weights = 'yolov7.pt', #model.pt path(s)
|
| 29 |
+
classes = None, #filter by class: --class 0, or --class 0 2 3
|
| 30 |
+
imgsz = 640, #inference size (pixels)
|
| 31 |
+
device = '', #cuda device, i.e. 0 or 0,1,2,3 or cpu
|
| 32 |
+
conf_thres = 0.25, #object confidence threshold
|
| 33 |
+
iou_thres = 0.45, #IOU threshold for NMS
|
| 34 |
+
augment = False, #augmented inference
|
| 35 |
+
agnostic_nms = False): #class-agnostic NMS
|
| 36 |
+
|
| 37 |
+
# Initialize
|
| 38 |
+
set_logging()
|
| 39 |
+
device = select_device(device)
|
| 40 |
+
half = device.type != 'cpu' # half precision only supported on CUDA
|
| 41 |
+
|
| 42 |
+
# Load model
|
| 43 |
+
model = attempt_load(weights, map_location=device) # load FP32 model
|
| 44 |
+
stride = int(model.stride.max()) # model stride
|
| 45 |
+
imgsz = check_img_size(imgsz, s=stride) # check img_size
|
| 46 |
+
|
| 47 |
+
if half:
|
| 48 |
+
model.half() # to FP16
|
| 49 |
+
|
| 50 |
+
# Set Dataloader
|
| 51 |
+
dataset = LoadImage(files = files, img_size=imgsz, stride=stride)
|
| 52 |
+
|
| 53 |
+
# Get names and colors
|
| 54 |
+
names = model.module.names if hasattr(model, 'module') else model.names
|
| 55 |
+
|
| 56 |
+
# Run inference
|
| 57 |
+
if device.type != 'cpu':
|
| 58 |
+
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
|
| 59 |
+
old_img_w = old_img_h = imgsz
|
| 60 |
+
old_img_b = 1
|
| 61 |
+
list_cropobj = []
|
| 62 |
+
for img, img0s in dataset:
|
| 63 |
+
img = torch.from_numpy(img).to(device)
|
| 64 |
+
img = img.half() if half else img.float() # uint8 to fp16/32
|
| 65 |
+
img /= 255.0 # 0 - 255 to 0.0 - 1.0
|
| 66 |
+
if img.ndimension() == 3:
|
| 67 |
+
img = img.unsqueeze(0)
|
| 68 |
+
|
| 69 |
+
# Inference
|
| 70 |
+
with torch.no_grad(): # Calculating gradients would cause a GPU memory leak
|
| 71 |
+
pred = model(img, augment=augment)[0]
|
| 72 |
+
|
| 73 |
+
# Apply NMS
|
| 74 |
+
pred = non_max_suppression(pred, conf_thres, iou_thres, classes=classes, agnostic=agnostic_nms)
|
| 75 |
+
|
| 76 |
+
# Process detections
|
| 77 |
+
for i, det in enumerate(pred): # detections per image
|
| 78 |
+
if len(det):
|
| 79 |
+
# Rescale boxes from img_size to img0s size
|
| 80 |
+
det[:, :4] = scale_coords(img.shape[2:], det[:, :4], img0s.shape).round()
|
| 81 |
+
|
| 82 |
+
# Write results
|
| 83 |
+
for *xyxy, conf, cls in reversed(det):
|
| 84 |
+
#crop an image based on coordinates
|
| 85 |
+
object_coordinates = [int(xyxy[0]),int(xyxy[1]),int(xyxy[2]),int(xyxy[3])]
|
| 86 |
+
cropobj_bgr = img0s[int(xyxy[1]):int(xyxy[3]),int(xyxy[0]):int(xyxy[2])]
|
| 87 |
+
cropobj_rgb = cv2.cvtColor(cropobj_bgr, cv2.COLOR_BGR2RGB)
|
| 88 |
+
clase = names[int(cls)]
|
| 89 |
+
list_cropobj.append([Image.fromarray(cropobj_rgb),int(cls)])
|
| 90 |
+
|
| 91 |
+
return list_cropobj
|
| 92 |
+
|
| 93 |
|
| 94 |
def get_attributes(input_img):
|
| 95 |
+
#access_token = str(os.environ.get('key'))
|
| 96 |
+
access_token = key
|
| 97 |
processor = DonutProcessor.from_pretrained("AA-supply/donut-finetuned-cedula", use_auth_token=access_token)
|
| 98 |
model = VisionEncoderDecoderModel.from_pretrained("AA-supply/donut-finetuned-cedula", use_auth_token=access_token)
|
| 99 |
|
|
|
|
| 102 |
model.eval()
|
| 103 |
model.to(device)
|
| 104 |
|
| 105 |
+
images = check_image(input_img)
|
| 106 |
+
images = crop(weights="best.pt", files= images)
|
| 107 |
+
image_cedula = [img[0] for img in images if img[1]==0][0] #0 en 'img[1]==0' es el label de cedula, si se reemplaza por 1
|
| 108 |
+
#entrega licencias
|
| 109 |
+
|
| 110 |
+
pixel_values = processor(image_cedula, return_tensors="pt").pixel_values
|
| 111 |
pixel_values = pixel_values.to(device)
|
| 112 |
print(pixel_values.size())
|
| 113 |
# prepare decoder inputs
|
best.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00bc306cd607b5ec22b8b499b81689081e2b478dd712dfa832955dc6c37684b4
|
| 3 |
+
size 162520891
|
requirements.txt
CHANGED
|
@@ -3,4 +3,5 @@ transformers
|
|
| 3 |
numpy
|
| 4 |
datasets
|
| 5 |
pdf2image
|
| 6 |
-
poppler-utils
|
|
|
|
|
|
| 3 |
numpy
|
| 4 |
datasets
|
| 5 |
pdf2image
|
| 6 |
+
poppler-utils
|
| 7 |
+
opencv-python
|